Currently, missing some environmental data and 1 rep at station A surface. Also, station H has some incongruency with the other reps but I think those are just bad filtered samples and nothing we can do about it.

ant_community <- read.csv("/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/NBP1910_envdata_forFEED.csv")

Need to tidy data

str(ant_community)
## 'data.frame':    87 obs. of  52 variables:
##  $ X               : chr  "A_Surf_1" "A_Surf_2" "A_Surf_3" "A_DCM_1" ...
##  $ sample          : chr  "A_Surf_1" "A_Surf_2" "A_Surf_3" "A_DCM_1" ...
##  $ Rep             : chr  "A" "B" "C" "A" ...
##  $ date            : chr  "6-Nov" "6-Nov" "6-Nov" "6-Nov" ...
##  $ Lat_deg         : int  64 64 64 64 64 64 64 64 64 64 ...
##  $ Lat_min         : num  32.4 32.4 32.4 32.4 32.4 32.4 53.7 53.7 53.7 53.7 ...
##  $ Lat             : num  64.5 64.5 64.5 64.5 64.5 ...
##  $ Long_deg        : int  62 62 62 62 62 62 64 64 64 64 ...
##  $ Long_min        : num  22.4 22.4 22.4 22.4 22.4 22.4 12.1 12.1 12.1 12.1 ...
##  $ Long            : num  62.4 62.4 62.4 62.4 62.4 ...
##  $ bottom          : int  740 740 740 740 740 740 799 799 799 799 ...
##  $ ice             : num  0 0 0 0 0 0 1 1 1 1 ...
##  $ airT            : num  -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 -1.4 -1.4 -1.4 -1.4 ...
##  $ PAR0            : chr  "nd" "nd" "nd" "nd" ...
##  $ station         : chr  "A" "A" "A" "A" ...
##  $ depth           : chr  "Surf" "Surf" "Surf" "DCM" ...
##  $ depth_m         : num  1.4 1.4 1.4 45.2 45.2 45.2 1 1 1 30.2 ...
##  $ size_num        : num  0.2 0.2 20 0.2 0.2 20 0.2 5 20 0.2 ...
##  $ size            : chr  "pico" "pico" "micro" "pico" ...
##  $ Chla            : num  0.5 0.5 0.5 0.52 0.52 0.52 0.43 0.43 0.43 0.22 ...
##  $ prDM            : int  1 1 1 46 46 46 1 1 1 30 ...
##  $ depSM           : num  0.991 0.991 0.991 45.545 45.545 ...
##  $ waterT1         : num  -0.631 -0.631 -0.631 -0.403 -0.403 ...
##  $ conductivity1   : num  27.8 27.8 27.8 28.1 28.1 ...
##  $ waterT2         : num  -0.625 -0.625 -0.625 -0.401 -0.401 ...
##  $ conductivity2   : num  27.8 27.8 27.8 28.1 28.1 ...
##  $ oxygen1         : num  7.01 7.01 7.01 6.23 6.23 ...
##  $ oxygen2         : num  7.08 7.08 7.08 6.32 6.32 ...
##  $ fluorescence    : num  0.151 0.151 0.151 0.739 0.739 ...
##  $ beamTrans       : num  97.9 97.9 97.9 98.6 98.6 ...
##  $ PAR1            : num  965.3 965.3 965.3 13.9 13.9 ...
##  $ PAR2            : num  2895.8 2895.8 2895.8 41.8 41.8 ...
##  $ latitude        : num  -64.5 -64.5 -64.5 -64.5 -64.5 ...
##  $ longitude       : num  -62.4 -62.4 -62.4 -62.4 -62.4 ...
##  $ timeJ           : num  310 310 310 310 310 ...
##  $ altM            : num  100.9 100.9 100.9 99.5 99.5 ...
##  $ spar            : num  920 920 920 1085 1085 ...
##  $ timeS           : num  1088 1088 1088 835 835 ...
##  $ scan            : int  26122 26122 26122 20045 20045 20045 58590 58590 58590 55872 ...
##  $ salinity1       : num  34 34 34 34.2 34.2 ...
##  $ salinity2       : num  34 34 34 34.2 34.2 ...
##  $ oxygenSaturation: num  8.2 8.2 8.2 8.14 8.14 ...
##  $ nbin            : int  72 72 72 65 65 65 38 38 38 41 ...
##  $ bprod           : num  4.07 4.07 4.07 3.64 3.64 3.64 7.85 7.85 7.85 1.68 ...
##  $ pprod_Sun       : chr  "nd" "nd" "nd" "nd" ...
##  $ pprod_PAR       : chr  "nd" "nd" "nd" "nd" ...
##  $ pprod_Sun_20    : chr  "nd" "nd" "nd" "nd" ...
##  $ Pprod_PAR_20    : chr  "nd" "nd" "nd" "nd" ...
##  $ Time_feeding    : int  30 30 30 30 30 30 30 30 30 30 ...
##  $ PNAN.mL         : num  1089 1415 586 1452 2234 ...
##  $ MNAN.mL         : num  37.2 18.6 27.9 167.6 44.7 ...
##  $ HNAN.mL         : num  382 316 140 261 223 ...
#Need to make some character variables factors

ant_community$Rep <- as.factor(ant_community$Rep)
ant_community$station <- as.factor(ant_community$station)
ant_community$depth <- as.factor(ant_community$depth)

Now that we have some proper factors I will group and summarise everything to get a proper plot and a raw boxplot for our data while removing the one bad sample with no data.

ant_community_no_NA <- filter(ant_community, !(Rep == "C" & station == "A" & depth == "DCM"))

PNAN_plot <- ggplot(ant_community_no_NA)+
  geom_boxplot(aes(x = station, y = PNAN.mL, fill = depth))

MNAN_plot <- ggplot(ant_community_no_NA)+
  geom_boxplot(aes(x = station, y = MNAN.mL, fill = depth))

HNAN_plot <- ggplot(ant_community_no_NA)+
  geom_boxplot(aes(x = station, y = HNAN.mL, fill = depth))

grid.arrange(PNAN_plot, MNAN_plot, HNAN_plot)

NBP_bacterivory_df <- read.csv("/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/NBP1910_envdata_forFEED_updated.csv")

NBP_bacterivory_df_noNA <- filter(NBP_bacterivory_df, !(Rep == "C" & station == "A" & depth == "DCM"))

NBP_bacterivory_df_noNA$usphere_mixo[NBP_bacterivory_df_noNA$usphere_mixo <0] <- 0

NBP_bacterivory_df_noNA$usphere_hetero[NBP_bacterivory_df_noNA$usphere_hetero <0] <- 0

NBP_bacterivory_finaldf <- dplyr::mutate(NBP_bacterivory_df_noNA,
                                         TNAN = PNAN_mL+MNAN_mL+HNAN_mL,
                                         PNAN_percent = PNAN_mL/TNAN*100,
                                         MNAN_percent = MNAN_mL/TNAN*100,
                                         HNAN_percent = HNAN_mL/TNAN*100,
                                         bac_usphere_ratio = Bact_mL/usphere_mL,
                                         MNAN_bacterivory = usphere_mixo/(Time_feeding/60)*bac_usphere_ratio,
                                         HNAN_bacterivory = usphere_hetero/(Time_feeding/60)*bac_usphere_ratio,
                                         MNAN_SS = MNAN_bacterivory/Bact_mL*MNAN_mL*100,
                                         HNAN_SS = HNAN_bacterivory/Bact_mL*HNAN_mL*100,
                                         MNAN_SS_perday = MNAN_SS*24,
                                         HNAN_SS_perday = HNAN_SS*24,
                                         Total_SS = MNAN_SS + HNAN_SS,
                                         MNAN_SS_percent = MNAN_SS/Total_SS*100,
                                         HNAN_SS_percent = HNAN_SS/Total_SS*100)

NBP_bacterivory_finaldf$Rep <- as.factor(NBP_bacterivory_finaldf$Rep)
NBP_bacterivory_finaldf$station <- as.factor(NBP_bacterivory_finaldf$station)
NBP_bacterivory_finaldf$depth <- as.factor(NBP_bacterivory_finaldf$depth)

NBP_bacterivory_finaldf$MNAN_SS_percent[NBP_bacterivory_finaldf$MNAN_SS_percent == "NaN"] <- 0
NBP_bacterivory_finaldf$HNAN_SS_percent[NBP_bacterivory_finaldf$HNAN_SS_percent == "NaN"] <- 0
#Percent functional group populations
PNAN_percent_plot <- ggplot(NBP_bacterivory_finaldf)+
  geom_boxplot(aes(x = station, y = PNAN_percent, fill = depth))

MNAN_percent_plot <- ggplot(NBP_bacterivory_finaldf)+
  geom_boxplot(aes(x = station, y = MNAN_percent, fill = depth))

HNAN_percent_plot <- ggplot(NBP_bacterivory_finaldf)+
  geom_boxplot(aes(x = station, y = HNAN_percent, fill = depth))

TNAN_percent_plot <- ggplot(NBP_bacterivory_finaldf)+
  geom_boxplot(aes(x = station, y = TNAN_percent, fill = depth))

grid.arrange(PNAN_percent_plot, MNAN_percent_plot, HNAN_percent_plot)

#SS per day

MNAN_SS_perday_plot <- ggplot(NBP_bacterivory_finaldf)+
  geom_boxplot(aes(x = station, y = MNAN_SS_perday, fill = depth))

HNAN_SS_perday_plot <- ggplot(NBP_bacterivory_finaldf)+
  geom_boxplot(aes(x = station, y = HNAN_SS_perday, fill = depth))

grid.arrange(MNAN_SS_perday_plot,HNAN_SS_perday_plot)

#Percentage of total ingestion
MNAN_SS_percent_plot <- ggplot(filter(NBP_bacterivory_finaldf, !(Rep == "B" & station == "Q" & depth == "Surf")))+
  geom_boxplot(aes(x = station, y = MNAN_SS_percent, fill = depth))+
  ylim(9,100)

HNAN_SS_percent_plot <- ggplot(filter(NBP_bacterivory_finaldf, !(Rep == "B" & station == "Q" & depth == "Surf")))+
  geom_boxplot(aes(x = station, y = HNAN_SS_percent, fill = depth))+
  ylim(9,100)

grid.arrange(MNAN_SS_percent_plot,HNAN_SS_percent_plot)
## Warning: Removed 16 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 26 rows containing non-finite values (`stat_boxplot()`).

ggplot(filter(NBP_bacterivory_finaldf, !(Rep == "B" & station == "Q" & depth == "Surf")))+
  geom_boxplot(aes(x = station, y = HNAN_SS_percent, fill = depth))+
  ylim(0,100)

#removing redundant column "X" before pca analysis
NBP_bacterivory_finaldf_forPCA <- select(NBP_bacterivory_finaldf, -X, -(Rep:date), -(PAR0:depth), -(pprod_Sun:Pprod_PAR_20))

rownames(NBP_bacterivory_finaldf_forPCA) <- NBP_bacterivory_finaldf_forPCA$sample

NBP_bacterivory_finaldf_forPCA_nosamplecol <- select(NBP_bacterivory_finaldf_forPCA, -sample)

NBP_bacterivory_pca <- vegan::rda(NBP_bacterivory_finaldf_forPCA_nosamplecol, scale = TRUE)


biplot(NBP_bacterivory_pca, display = c("sites", "species"), type = c("text", "points"))

#By removing negatives for 0s we now get NaN values for our percentages because 

NBP_bacterivory_pca_prcomp <- prcomp(NBP_bacterivory_finaldf_forPCA_nosamplecol, center = TRUE, scale. = TRUE)

autoplot(NBP_bacterivory_pca_prcomp, data = NBP_bacterivory_finaldf, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `select()` instead.
## ℹ The deprecated feature was likely used in the dplyr package.
##   Please report the issue at <https://github.com/tidyverse/dplyr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

For some of the stations, there is 0 calculated feeding for both HNAN and MNAN due to the high amount of background from T0s from the doubling of microspheres added and increase in amount filtered. This creates a situation where a percent calculation ends up as = 0/0 or NaN. For initial analysis purposes I will change all NaNs into 0s. I will revisit those slides with negative values to make sure there aren’t any typos or slides I still need to reread.

Can explain lack of feeding as the bacterivory occuring from the larger micro plankton size.

Need to use this data in conjunction with brdu experiments.

Now there are some issues with this first PCA analysis. ALL variables were used including all calculated and some biased variables (looking at you usphere concentration). This next chunk is to narrow down and remove some covariates and irrelevant variables.

NBP_bacterivory_finaldf_forPCA_2 <- select(NBP_bacterivory_finaldf_forPCA, -(sample:Long),
                                             -(waterT2:conductivity2),
                                             -oxygen2,
                                             -PAR2,
                                             -salinity2,
                                           -Time_feeding,
                                           -(usphere_mL:usphere_hetero),
                                           -(PNAN_percent:bac_usphere_ratio),
                                           -(MNAN_bacterivory:HNAN_SS),
                                           -(MNAN_SS_percent:HNAN_SS_percent))

NBP_all_bacterivory_functional_PCA <- prcomp(NBP_bacterivory_finaldf_forPCA_2, center = TRUE, scale. = TRUE)

NBP_all_bacterivory_functional_PCA_test <- prcomp(NBP_bacterivory_finaldf_forPCA_2, center = TRUE, scale = TRUE)
#Testing if scale or scale. matter

autoplot(NBP_all_bacterivory_functional_PCA, data = NBP_bacterivory_finaldf, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)
## Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

<img src=“Ant_community_feeding_files/figure-html/Refined PCA analysis with”functional" groups and bacterivory-1.png" width=“672” />

autoplot(NBP_all_bacterivory_functional_PCA_test, data = NBP_bacterivory_finaldf, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)
## Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

<img src=“Ant_community_feeding_files/figure-html/Refined PCA analysis with”functional" groups and bacterivory-2.png" width=“672” />

#VERY LITTLE DIFFERENCE

#Testing functional
NBP_bacterivory_finaldf_functional <- select(NBP_bacterivory_finaldf_forPCA_2, -(PNAN_mL:HNAN_mL), -(PNAN_mL_fp:HNAN_mL_fp), -TNAN)

NBP_bacterivory_finaldf_bacterivory <- select(NBP_bacterivory_finaldf_forPCA_2, -(MNAN_SS_perday:Total_SS), -(PNAN_mL_fp:HNAN_mL_fp))


NBP_all_functional_PCA <- prcomp(NBP_bacterivory_finaldf_functional, center = TRUE, scale = TRUE)

NBP_all_bacterivory_PCA <- prcomp(NBP_bacterivory_finaldf_bacterivory, center = TRUE, scale = TRUE)

autoplot(NBP_all_functional_PCA, data = NBP_bacterivory_finaldf, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)
## Warning: ggrepel: 23 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

<img src=“Ant_community_feeding_files/figure-html/Refined PCA analysis with”functional" groups and bacterivory-3.png" width=“672” />

autoplot(NBP_all_bacterivory_PCA, data = NBP_bacterivory_finaldf, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)
## Warning: ggrepel: 47 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

<img src=“Ant_community_feeding_files/figure-html/Refined PCA analysis with”functional" groups and bacterivory-4.png" width=“672” />

Seems there’s a difference in groupings between all, functional, and bacterivory. In all, the 4 main groupings still shine through, however, theres station Q which is coupling with the Southern stations and SOME overlap between Northern, Offshore, and Southern stations. Other still remains separate from the other stations.

When we remove the functional parameters, the groupings become much more distinct and station Q seems to lie between Northern and Southern close to Offshore sites. Other remains well separate from the other groupings.

When we remove the bacterivory variables, we see a PCA resembling a spread similar to the “all” plot. The 2 major differences are the increased overlap of the southern and offshore sites and station A groups with the rest of the Northern sites, which is expected.

Lastly, I want to test of I find similar results using the vegan package.

#Need to recalculate everything

NBP_bacterivory_finaldf_fp <- dplyr::mutate(NBP_bacterivory_df_noNA,
                                         TNAN = PNAN_mL_fp+MNAN_mL_fp+HNAN_mL_fp,
                                         PNAN_percent = PNAN_mL_fp/TNAN*100,
                                         MNAN_percent = MNAN_mL_fp/TNAN*100,
                                         HNAN_percent = HNAN_mL_fp/TNAN*100,
                                         bac_usphere_ratio = Bact_mL/usphere_mL,
                                         MNAN_bacterivory = usphere_mixo_fp/(Time_feeding/60)*bac_usphere_ratio,
                                         HNAN_bacterivory = usphere_hetero_fp/(Time_feeding/60)*bac_usphere_ratio,
                                         MNAN_SS = MNAN_bacterivory/Bact_mL*MNAN_mL_fp*100,
                                         HNAN_SS = HNAN_bacterivory/Bact_mL*HNAN_mL_fp*100,
                                         MNAN_SS_perday = MNAN_SS*24,
                                         HNAN_SS_perday = HNAN_SS*24,
                                         Total_SS = MNAN_SS + HNAN_SS,
                                         MNAN_SS_percent = MNAN_SS/Total_SS*100,
                                         HNAN_SS_percent = HNAN_SS/Total_SS*100)

#Now for new plots of everything

PNAN_plot_fp <- ggplot(NBP_bacterivory_finaldf_fp)+
  geom_boxplot(aes(x = station, y = PNAN_mL_fp, fill = depth))

MNAN_plot_fp <- ggplot(NBP_bacterivory_finaldf_fp)+
  geom_boxplot(aes(x = station, y = MNAN_mL_fp, fill = depth))

HNAN_plot_fp <- ggplot(NBP_bacterivory_finaldf_fp)+
  geom_boxplot(aes(x = station, y = HNAN_mL_fp, fill = depth))

grid.arrange(PNAN_plot_fp, MNAN_plot_fp, HNAN_plot_fp)

#SS per day

MNAN_SS_perday_plot_fp <- ggplot(NBP_bacterivory_finaldf_fp)+
  geom_boxplot(aes(x = station, y = MNAN_SS_perday, fill = depth))

HNAN_SS_perday_plot_fp <- ggplot(NBP_bacterivory_finaldf_fp)+
  geom_boxplot(aes(x = station, y = HNAN_SS_perday, fill = depth))

grid.arrange(MNAN_SS_perday_plot_fp,HNAN_SS_perday_plot_fp)

#PCA analysis
NBP_bacterivory_finaldf_forPCA_fp <- select(NBP_bacterivory_finaldf_fp, -X, -(Rep:date), -(PAR0:depth), -(pprod_Sun:Pprod_PAR_20), -(PNAN_mL:HNAN_mL), -(usphere_mL:usphere_hetero))

rownames(NBP_bacterivory_finaldf_forPCA_fp) <- NBP_bacterivory_finaldf_forPCA_fp$sample

NBP_bacterivory_finaldf_forPCA_nosamplecol_fp <- select(NBP_bacterivory_finaldf_forPCA_fp, -sample, -(MNAN_SS_percent:HNAN_SS_percent))

NBP_bacterivory_pca_prcomp_fp <- prcomp(NBP_bacterivory_finaldf_forPCA_nosamplecol_fp, center = TRUE, scale. = TRUE)

autoplot(NBP_bacterivory_pca_prcomp_fp, data = NBP_bacterivory_finaldf_fp, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#Testing removing feeding
NBP_bacterivory_finaldf_forPCA_2_fp <- select(NBP_bacterivory_finaldf_forPCA_fp, -(sample:Long),
                                             -(waterT2:conductivity2),
                                             -oxygen2,
                                             -PAR2,
                                             -salinity2,
                                           -Time_feeding,
                                           -(PNAN_percent:bac_usphere_ratio),
                                           -(MNAN_bacterivory:HNAN_SS))

NBP_bacterivory_finaldf_functional_fp <- select(NBP_bacterivory_finaldf_forPCA_2_fp, -(PNAN_mL_fp:HNAN_mL_fp), -TNAN, -(MNAN_SS_percent:HNAN_SS_percent))

NBP_bacterivory_finaldf_bacterivory_fp <- select(NBP_bacterivory_finaldf_forPCA_2_fp, -(MNAN_SS_perday:Total_SS),-(MNAN_SS_percent:HNAN_SS_percent))

NBP_all_functional_PCA_fp <- prcomp(NBP_bacterivory_finaldf_functional_fp, center = TRUE, scale = TRUE)

NBP_all_bacterivory_PCA_fp <- prcomp(NBP_bacterivory_finaldf_bacterivory_fp, center = TRUE, scale = TRUE)

autoplot(NBP_all_functional_PCA_fp, data = NBP_bacterivory_finaldf, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

autoplot(NBP_all_bacterivory_PCA_fp, data = NBP_bacterivory_finaldf, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)
## Warning: ggrepel: 45 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Now I’m going to redo all of this…again with the final false positive method and fixing some typos/errors in the excel file.

#Old Groups can replace with next line at any time
#ant_community_final <- read.csv("/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/NBP1910_envdata_forFEED_final.csv")

#New Groups
ant_community_final <- read.csv("/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/NBP1910_envdata_forFEED_final_newgroup.csv")


str(ant_community_final)
## 'data.frame':    87 obs. of  66 variables:
##  $ X                     : chr  "A_1" "A_2" "A_3" "A_4" ...
##  $ sample                : chr  "A_Surf_1" "A_Surf_2" "A_Surf_3" "A_DCM_1" ...
##  $ Rep                   : chr  "A" "B" "C" "A" ...
##  $ Group                 : chr  "Gerlache" "Gerlache" "Gerlache" "Gerlache" ...
##  $ date                  : chr  "6-Nov" "6-Nov" "6-Nov" "6-Nov" ...
##  $ Lat_deg               : int  64 64 64 64 64 64 64 64 64 64 ...
##  $ Lat_min               : num  32.4 32.4 32.4 32.4 32.4 32.4 53.7 53.7 53.7 53.7 ...
##  $ Lat                   : num  64.5 64.5 64.5 64.5 64.5 ...
##  $ Long_deg              : int  62 62 62 62 62 62 64 64 64 64 ...
##  $ Long_min              : num  22.4 22.4 22.4 22.4 22.4 22.4 12.1 12.1 12.1 12.1 ...
##  $ Long                  : num  62.4 62.4 62.4 62.4 62.4 ...
##  $ bottom                : int  740 740 740 740 740 740 799 799 799 799 ...
##  $ ice                   : num  0 0 0 0 0 0 1 1 1 1 ...
##  $ airT                  : num  -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 -1.4 -1.4 -1.4 -1.4 ...
##  $ PAR0                  : chr  "nd" "nd" "nd" "nd" ...
##  $ station               : chr  "A" "A" "A" "A" ...
##  $ depth                 : chr  "Surf" "Surf" "Surf" "DCM" ...
##  $ depth_m               : num  1.4 1.4 1.4 45.2 45.2 45.2 1 1 1 30.2 ...
##  $ Chla                  : num  0.5 0.5 0.5 0.52 0.52 0.52 0.43 0.43 0.43 0.22 ...
##  $ prDM                  : int  1 1 1 46 46 46 1 1 1 30 ...
##  $ depSM                 : num  0.991 0.991 0.991 45.545 45.545 ...
##  $ ZML_T_m..0.2ºC.       : num  43.6 43.6 43.6 43.6 43.6 ...
##  $ ZML_TS_m..0.023Kg.m3. : num  24.8 24.8 24.8 24.8 24.8 ...
##  $ ZML_TSP_m..0.023Kg.m3.: num  13.9 13.9 13.9 13.9 13.9 ...
##  $ Ze_m..1..             : num  49.5 49.5 49.5 49.5 49.5 ...
##  $ ZCM_m..maxFluo.       : num  30.7 30.7 30.7 30.7 30.7 ...
##  $ waterT1               : num  -0.631 -0.631 -0.631 -0.403 -0.403 ...
##  $ conductivity1         : num  27.8 27.8 27.8 28.1 28.1 ...
##  $ waterT2               : num  -0.625 -0.625 -0.625 -0.401 -0.401 ...
##  $ conductivity2         : num  27.8 27.8 27.8 28.1 28.1 ...
##  $ oxygen1               : num  7.01 7.01 7.01 6.23 6.23 ...
##  $ oxygen2               : num  7.08 7.08 7.08 6.32 6.32 ...
##  $ fluorescence          : num  0.151 0.151 0.151 0.739 0.739 ...
##  $ beamTrans             : num  97.9 97.9 97.9 98.6 98.6 ...
##  $ PAR1                  : num  965.3 965.3 965.3 13.9 13.9 ...
##  $ PAR2                  : num  2895.8 2895.8 2895.8 41.8 41.8 ...
##  $ latitude              : num  -64.5 -64.5 -64.5 -64.5 -64.5 ...
##  $ longitude             : num  -62.4 -62.4 -62.4 -62.4 -62.4 ...
##  $ timeJ                 : num  310 310 310 310 310 ...
##  $ altM                  : num  100.9 100.9 100.9 99.5 99.5 ...
##  $ spar                  : num  920 920 920 1085 1085 ...
##  $ timeS                 : num  1088 1088 1088 835 835 ...
##  $ scan                  : int  26122 26122 26122 20045 20045 20045 58590 58590 58590 55872 ...
##  $ salinity1             : num  34 34 34 34.2 34.2 ...
##  $ salinity2             : num  34 34 34 34.2 34.2 ...
##  $ oxygenSaturation      : num  8.2 8.2 8.2 8.14 8.14 ...
##  $ nbin                  : int  72 72 72 65 65 65 38 38 38 41 ...
##  $ bprod                 : num  4.07 4.07 4.07 3.64 3.64 3.64 7.85 7.85 7.85 1.68 ...
##  $ pprod_Sun             : chr  "nd" "nd" "nd" "nd" ...
##  $ pprod_PAR             : chr  "nd" "nd" "nd" "nd" ...
##  $ pprod_Sun_20          : chr  "nd" "nd" "nd" "nd" ...
##  $ Pprod_PAR_20          : chr  "nd" "nd" "nd" "nd" ...
##  $ NH4                   : num  1.38 1.38 1.38 1.27 1.27 1.27 1.48 1.48 1.48 1.35 ...
##  $ NO2_NO3               : num  30 30 30 30.9 30.9 ...
##  $ PO4                   : num  3.03 3.03 3.03 3.29 3.29 3.29 3.03 3.03 3.03 3.07 ...
##  $ Total_DIN             : num  31.4 31.4 31.4 32.1 32.1 ...
##  $ N_P_Ratio             : num  10.35 10.35 10.35 9.77 9.77 ...
##  $ Time_feeding          : int  30 30 30 30 30 30 30 30 30 30 ...
##  $ ANAN_mL               : num  1089 1415 586 1452 1551 ...
##  $ PNAN_mL               : num  1126 1433 614 1620 1582 ...
##  $ MNAN_mL               : num  23.27 4.65 13.96 130.32 0 ...
##  $ HNAN_mL               : num  382 316 140 261 155 ...
##  $ Bact_mL               : num  136415 136415 136415 158414 158414 ...
##  $ usphere_mL            : num  20205 20205 20205 20781 20781 ...
##  $ usphere_mixo          : num  0.665 0 0 0.604 0.427 ...
##  $ usphere_hetero        : num  0.0887 0 0 0.1294 0.0854 ...
ant_community_final$Rep <- as.factor(ant_community$Rep)
ant_community_final$station <- as.factor(ant_community$station)
ant_community_final$depth <- factor(ant_community$depth, levels = c("Surf", "DCM", "Bucket"))
ant_community_final$Group <- factor(ant_community_final$Group, levels = c("Gerlache", "Palmer", "Grandidier","Offshore","Maguerite"))

All of the calculations that follow will be done on a per mL basis.

ant_community_final <- filter(ant_community_final, !(Rep == "C" & station == "A" & depth == "DCM"))

ant_community_final_calc <- dplyr::mutate(ant_community_final,
                                         TNAN = ANAN_mL+MNAN_mL+HNAN_mL,
                                         PNAN_percent = PNAN_mL/TNAN*100,
                                         MNAN_percent = MNAN_mL/TNAN*100,
                                         HNAN_percent = HNAN_mL/TNAN*100,
                                         ANAN_percent = ANAN_mL/TNAN*100,
                                         bac_usphere_ratio = Bact_mL/usphere_mL,
                                         MNAN_bacterivory = usphere_mixo/(Time_feeding/60)*bac_usphere_ratio,
                                         HNAN_bacterivory = usphere_hetero/(Time_feeding/60)*bac_usphere_ratio,
                                         MNAN_SS = MNAN_bacterivory/Bact_mL*MNAN_mL*100,
                                         HNAN_SS = HNAN_bacterivory/Bact_mL*HNAN_mL*100,
                                         MNAN_SS_perday = MNAN_SS*24,
                                         HNAN_SS_perday = HNAN_SS*24,
                                         Total_SS = MNAN_SS + HNAN_SS,
                                         MNAN_SS_percent = MNAN_SS/Total_SS*100,
                                         HNAN_SS_percent = HNAN_SS/Total_SS*100)
#Abundance boxPlots
PNAN_plot_final <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = PNAN_mL, fill = depth))+ylab(expression("PNAN"~ "ml"^-1))+labs(fill = "Depth")+xlab("Station")

MNAN_plot_final <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = MNAN_mL, fill = depth))+ylab(expression("MNAN"~ "ml"^-1))+labs(fill = "Depth")+xlab("Station")

HNAN_plot_final <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = HNAN_mL, fill = depth))+ylab(expression("HNAN"~ "ml"^-1))+labs(fill = "Depth")+xlab("Station")

ANAN_plot_final <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = ANAN_mL, fill = depth))+ylab(expression("ANAN"~ "ml"^-1))+labs(fill = "Depth")+xlab("Station")

grid.arrange(PNAN_plot_final, MNAN_plot_final, HNAN_plot_final, ANAN_plot_final)

Abundance_ggarrange <- ggarrange(PNAN_plot_final, MNAN_plot_final, HNAN_plot_final,  common.legend = TRUE, legend = "bottom", labels = c("A", "B","C","D"))

Abundance_ggarrange

#Abundance barplots
PNAN_plot_final_bar <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_col(aes(x = station, y = PNAN_mL, fill = depth), position = position_dodge())+ylab(expression("PNAN"~ "ml"^-1))+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

MNAN_plot_final_bar <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_col(aes(x = station, y = MNAN_mL, fill = depth), position = position_dodge())+ylab(expression("MNAN"~ "ml"^-1))+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

HNAN_plot_final_bar <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_col(aes(x = station, y = HNAN_mL, fill = depth), position = position_dodge())+ylab(expression("HNAN"~ "ml"^-1))+labs(fill = "Depth")+xlab("Station")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

Bact_abund_final_bar <- ggplot(dplyr::filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_col(aes(x = station, y = Bact_mL, fill = depth), position = position_dodge())+ylab(expression("Bacteria"~ "ml"^-1))+labs(fill = "Depth")+xlab("Station")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

Abundance_ggarrange_bar <- ggarrange(PNAN_plot_final_bar, MNAN_plot_final_bar, HNAN_plot_final_bar, Bact_abund_final_bar,  common.legend = TRUE, legend = "none", ncol = 2, nrow = 2, label.x = 0.95, label.y = 1.15, hjust = -0.1)

Abundance_ggarrange_bar

ggsave("ALL_abundance_BARplot.png", plot = Abundance_ggarrange_bar, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggsave("ALL_abundance_BARplot.pdf", plot = Abundance_ggarrange_bar, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#bacterivory plots
MNAN_SS_perday_plot_final <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = MNAN_SS_perday, fill = depth))+ ylab("MNAN"~"bacterivory"~"day"^-1)+labs(fill = "Depth")+xlab("Station")

MNAN_SS_perday_plot_final1 <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = MNAN_SS_perday, fill = depth))+ ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

MNAN_SS_perday_plot_final_all <- ggplot(ant_community_final_calc)+
  geom_boxplot(aes(x = station, y = MNAN_SS_perday, fill = depth))+ ylab("MNAN"~"bacterivory"~"day"^-1)

HNAN_SS_perday_plot_final <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = HNAN_SS_perday, fill = depth))+ ylab("HNAN"~"bacterivory"~"day"^-1)+labs(fill = "Depth")+xlab("Station")

HNAN_SS_perday_plot_final1 <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = HNAN_SS_perday, fill = depth))+ ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

grid.arrange(MNAN_SS_perday_plot_final,HNAN_SS_perday_plot_final)

#can do both of these metrics as percents for a nice scalar but it doesn't matter for now.
#Bacterivory percent plots
MNAN_SS_percent_perday_plot_final <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = MNAN_SS_percent, fill = depth))+ ylab(str_wrap("MNAN Percent of Bacterivory", 20))

MNAN_SS_percent_perday_plot_final_all <- ggplot(ant_community_final_calc)+
  geom_boxplot(aes(x = station, y = MNAN_SS_percent, fill = depth))+ ylab(str_wrap("MNAN Percent of Bacterivory", 20))

HNAN_SS_percent_perday_plot_final <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(x = station, y = HNAN_SS_percent, fill = depth))+ ylab(str_wrap("HNAN Percent of Bacterivory", 20))

#Publication ready versions of plots
grid.arrange(MNAN_SS_perday_plot_final,HNAN_SS_perday_plot_final)

ggarrange(PNAN_plot_final, MNAN_plot_final, HNAN_plot_final, ncol = 1, common.legend = TRUE, legend = "bottom")

ggarrange(HNAN_SS_perday_plot_final,MNAN_SS_perday_plot_final, ncol = 1, common.legend = TRUE, legend = "bottom")

Bacterivory_ggarrange <- ggarrange(HNAN_SS_perday_plot_final1, MNAN_SS_perday_plot_final1, ncol = 1, common.legend = TRUE, legend = "none", labels = c("A", "B"))

Bacterivory_ggarrange1 <- annotate_figure(Bacterivory_ggarrange, left = text_grob("Standing Stock Removed (% day"^-1~")", rot = 90, size = 10))

ggarrange(MNAN_SS_percent_perday_plot_final,HNAN_SS_percent_perday_plot_final, ncol = 1, common.legend = TRUE, legend = "bottom")
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Removed 2 rows containing non-finite values (`stat_boxplot()`).

#Testing if I can ggarrange two already arranged plots
#ggarrange(Bacterivory_ggarrange, Abundance_ggarrange, common.legend = TRUE)
#This works

#Removed stations N and beyond for the analysis so we should remove them for this plot as well.
PNAN_mixo_percent <- ant_community_final_calc$MNAN_mL/ant_community_final_calc$ANAN_mL*100
HNAN_mixo_percent <- ant_community_final_calc$MNAN_mL/(ant_community_final_calc$HNAN_mL+ant_community_final_calc$MNAN_mL)*100

#mixo PNAN% 0-14%
#mixo HNAN% 0-61%

#grouped and removed station data
#PNAN_mixo_percent_grp <- ant_community_final_calc_grp_remStations$MNAN_mL/ant_community_final_calc_grp_remStations$ANAN_mL*100
#HNAN_mixo_percent_grp <- ant_community_final_calc_grp_remStations$MNAN_mL/(ant_community_final_calc_grp_remStations$HNAN_mL+ant_community_final_calc_grp_remStations$MNAN_mL)*100


#mixo PNAN% 0-11%
#mixo HNAN% 0-42%
#mixo bacter% 0-30%

#Now to build me a table

mixo_percent_character_vector <- c("0 - 11", "0 - 42"," 0 - 30")
mixo_percent_df <- t(as.data.frame(mixo_percent_character_vector))
rownames(mixo_percent_df) <- NULL
mixo_percent_colnames <- str_wrap(c("Phototrophic Nanoplankton PNAN (% mixotrophic)","Bacterivorous Nanoplankton HNAN (% mixotrophic", "Bacterial Standing Stock Removed by Mixotrophic Nanoplankton (% per day)"), width = 10)
ggtexttable(mixo_percent_df, cols = mixo_percent_colnames, theme = ttheme("minimal", 15)) %>% tab_add_hline(at.row = 1:2, row.side = "bottom", linewidth = 2) %>% tab_add_hline(at.row = 1:2, row.side = "bottom", linewidth = 2)

#devtools::install_github("davidgohel/flextable")
library(flextable)
mixo_percent_df1 <- as.data.frame(mixo_percent_df)
flextable(mixo_percent_df1)
#need to add rownames for the PCA analysis to run
rownames(ant_community_final_calc) <- ant_community_final_calc$X
ant_community_final_functional <- select(ant_community_final_calc, ANAN_mL:HNAN_mL)
ant_community_final_calc_bacterivory <- select(ant_community_final_calc, MNAN_SS_perday:HNAN_SS_perday)
ant_community_final_calc_env <- select(ant_community_final_calc, -(X:Lat_min), 
                                             -(Long_deg:Long_min), -(PAR0:depth),
                                             -(waterT2:conductivity2), -oxygen2, -(PAR2:longitude),
                                             -salinity2, -(pprod_Sun:HNAN_mL),
                                             -(usphere_mL:HNAN_SS_percent))

ant_community_final_calc_env_trimmed <- select(ant_community_final_calc, -(X:Lat_min), 
                                             -(Long_deg:Long_min), -(PAR0:depth),
                                             -(waterT2:conductivity2), -oxygen2, -(PAR2:longitude),
                                             -salinity2, -(pprod_Sun:HNAN_mL),
                                             -(usphere_mL:HNAN_SS_percent))


#load in ggvegan from github

#devtools::install_github("gavinsimpson/ggvegan")

functional_pca <-  rda(ant_community_final_functional, scale = TRUE)
bacterivory_pca <- rda(ant_community_final_calc_bacterivory, scale = TRUE)

functional_prcomp <- prcomp(ant_community_final_functional, center = TRUE, scale = TRUE)
bacterivory_prcomp <- prcomp(ant_community_final_calc_bacterivory, center = TRUE, scale = TRUE)

autoplot(functional_prcomp, data = ant_community_final_calc, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)

autoplot(bacterivory_prcomp, data = ant_community_final_calc, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#biplot(functional_pca)
#autoplot(functional_pca)
#autoplot(bacterivory_pca)

functional_pca_fit <- envfit(functional_prcomp ~ ., ant_community_final_calc_env, perm = 0)
#autoplot(functional_pca_fit, geom = "label_repel")

bacterivory_pca_fit <- envfit(bacterivory_prcomp ~ ., ant_community_final_calc_env, perm = 0)
#autoplot(bacterivory_pca_fit, geom = "label_repel")

#biplot(functional_prcomp, functional_pca_fit)

Even though the raw data is noisy, I want to look at the raw station replicates with the later stations removed to see if that changes impacts the datasets variation.

ant_community_final_calc_STrem <- filter(ant_community_final_calc, !station %in% c("N", "O","P","Q"))

ant_community_final_functional_STrem <- select(ant_community_final_calc_STrem, ANAN_mL:HNAN_mL)
ant_community_final_calc_bacterivory_STrem <- select(ant_community_final_calc_STrem, MNAN_SS_perday:HNAN_SS_perday)
ant_community_final_calc_env_STrem <- select(ant_community_final_calc_STrem, -(X:Lat_min), 
                                             -(Long_deg:Long_min), -(PAR0:depth),
                                             -(waterT2:conductivity2), -oxygen2, -(PAR2:longitude),
                                             -salinity2, -(pprod_Sun:HNAN_mL),
                                             -(usphere_mL:HNAN_SS_percent))

functional_prcomp_STrem <- prcomp(ant_community_final_functional_STrem, center = TRUE, scale = TRUE)
bacterivory_prcomp_STrem <- prcomp(ant_community_final_calc_bacterivory_STrem, center = TRUE, scale = TRUE)

autoplot(functional_prcomp_STrem, data = ant_community_final_calc_STrem, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)

autoplot(bacterivory_prcomp_STrem, data = ant_community_final_calc_STrem, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)

#May still need to separate out PP from bacterivory depending on how messy the data is
#The trend shows pretty nicely compared to the whole dataset. I will repeat this while separating out the PP and hetero abundances

ant_community_final_functional_STrem_PP <- select(ant_community_final_calc_STrem, ANAN_mL, MNAN_mL)
ant_community_final_functional_STrem_hetero <- select(ant_community_final_calc_STrem, MNAN_mL:HNAN_mL)

functional_prcomp_STrem_PP <- prcomp(ant_community_final_functional_STrem_PP, center = TRUE, scale = TRUE)
bacterivory_prcomp_STrem_hetero <- prcomp(ant_community_final_functional_STrem_hetero, center = TRUE, scale = TRUE)

autoplot(functional_prcomp_STrem_PP, data = ant_community_final_calc_STrem, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)

autoplot(bacterivory_prcomp_STrem_hetero, data = ant_community_final_calc_STrem, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)

The data seems a intra site variability seems a little high using a straight euclidean distance

ant_community_final_calc_grp <- group_by(ant_community_final_calc, Group, station, depth) %>% 
  summarise_if(is.numeric, mean, na.rm = TRUE)

ant_community_final_functional_grp <- select(ant_community_final_calc_grp, ANAN_mL:HNAN_mL)
## Adding missing grouping variables: `Group`, `station`
ant_community_final_calc_bacterivory_grp <- select(ant_community_final_calc_grp, MNAN_SS_perday:HNAN_SS_perday, -station)
## Adding missing grouping variables: `Group`, `station`
ant_community_final_calc_env_grp <- select(ant_community_final_calc_grp, 
                                             -(Long_deg:Long),
                                             -(waterT2:conductivity2), -oxygen2, -PAR2,
                                             -salinity2, -(Time_feeding:HNAN_mL),
                                             -(usphere_mL:HNAN_SS_percent))

ant_community_final_functional_grp_fixed <- ant_community_final_functional_grp[,3:6]
ant_community_final_calc_bacterivory_grp_fixed <- ant_community_final_calc_bacterivory_grp[,3:4]
ant_community_final_calc_env_grp_fixed <- ant_community_final_calc_env_grp[,3:31]



#need to make rownames for sites of teh grped datasets
grouped_rownames <- paste(ant_community_final_calc_grp$station, ant_community_final_calc_grp$depth, sep = "_")
rownames(ant_community_final_functional_grp_fixed) <- grouped_rownames
## Warning: Setting row names on a tibble is deprecated.
rownames(ant_community_final_calc_bacterivory_grp_fixed) <- grouped_rownames
## Warning: Setting row names on a tibble is deprecated.
rownames(ant_community_final_calc_env_grp_fixed) <- grouped_rownames
## Warning: Setting row names on a tibble is deprecated.
functional_prcomp_grp <- prcomp(as.data.frame(ant_community_final_functional_grp_fixed), center = TRUE, scale = TRUE)
bacterivory_prcomp_grp <- prcomp(ant_community_final_calc_bacterivory_grp_fixed, center = TRUE, scale = TRUE)
#need to add rownames to the calc pre-selected data
rownames(ant_community_final_calc_grp) <- grouped_rownames
## Warning: Setting row names on a tibble is deprecated.
#functional plot w/all data
autoplot(functional_prcomp_grp, data = as.data.frame(ant_community_final_calc_grp), label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'depth', label.repel = TRUE)

#bacterivory plot w/all data
autoplot(bacterivory_prcomp_grp, data = as.data.frame(ant_community_final_calc_grp), label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'depth', label.repel = TRUE)

#Functional plot w/on MNAN and HNAN
ant_community_final_functional_grp_fixed_M_HNAN_only <- select(ant_community_final_functional_grp_fixed, (MNAN_mL:HNAN_mL))

rownames(ant_community_final_functional_grp_fixed_M_HNAN_only) <- grouped_rownames
## Warning: Setting row names on a tibble is deprecated.
functional_prcomp_grp_M_HNAN <- prcomp(ant_community_final_functional_grp_fixed_M_HNAN_only, center = TRUE, scale = TRUE)

autoplot(functional_prcomp_grp_M_HNAN, data = as.data.frame(ant_community_final_calc_grp), label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'depth', label.repel = TRUE)

#Functional plot w/MNAN and ANAN (NOT PNAN since it's the addition of MNAN and ANAN)
ant_community_final_functional_grp_fixed_M_PNAN_only <- select(ant_community_final_functional_grp_fixed, ANAN_mL, MNAN_mL)

rownames(ant_community_final_functional_grp_fixed_M_PNAN_only) <- grouped_rownames
## Warning: Setting row names on a tibble is deprecated.
functional_prcomp_grp_M_PNAN <- prcomp(ant_community_final_functional_grp_fixed_M_PNAN_only, center = TRUE, scale = TRUE)

autoplot(functional_prcomp_grp_M_PNAN, data = as.data.frame(ant_community_final_calc_grp), label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'depth', label.repel = TRUE)

Something I have learned is that tibbles do not keep rownames after filtering. Must keep readding rownames after all filtering. In order to keep rownames, need to make it a column and readd after filtering.

Because this adds a lot of extra coding I’m making a separate coding chunk for this analysis.

#Grouped dataset first because it is cleaner
ant_community_final_calc_grp_remStations <- filter(ant_community_final_calc_grp, !station %in% c("N", "O","P","Q"), !depth == "Bucket")

#Renameing colnames that look bad in the analysis
#colnames to change
#c("waterT1", "conductivity1", "oxygen1", "PAR1", "salinity", "oxygenSaturation","ZML_TS_m..0.023Kg.m3.")
colname_fix <- c("waterT1", "conductivity1", "oxygen1", "PAR1", "salinity1", "oxygenSaturation","ZML_TS_m..0.023Kg.m3.","timeJ","Ze_m..1..")

#Check the order if the which statement first - that's why the new names are in a diff order from the vector
colnames(ant_community_final_calc_grp_remStations)[which(colnames(ant_community_final_calc_grp_remStations) %in% colname_fix)] <- c("ZML","ZE", "Water_Temp", "conductivity", "oxygen","PAR","Time", "salinity", "oxygen_saturation")

ant_community_final_functional_grp_remST <- select(ant_community_final_calc_grp_remStations, ANAN_mL:HNAN_mL)
## Adding missing grouping variables: `Group`, `station`
ant_community_final_calc_bacterivory_grp_remST <- select(ant_community_final_calc_grp_remStations, MNAN_SS_perday:HNAN_SS_perday, -station)
## Adding missing grouping variables: `Group`, `station`
ant_community_final_calc_env_grp_remST <- select(ant_community_final_calc_grp_remStations, 
                                             -(Lat_deg:Long),
                                             -(waterT2:conductivity2), -oxygen2, -PAR2,
                                             -salinity2, -(Time_feeding:HNAN_mL),
                                             -(usphere_mL:HNAN_SS_percent))

ant_community_final_functional_grp_fixed_remST <- ant_community_final_functional_grp_remST[,3:6]
ant_community_final_calc_bacterivory_grp_fixed_remST <- ant_community_final_calc_bacterivory_grp_remST[,3:4]
ant_community_final_calc_env_grp_fixed_remST <- ant_community_final_calc_env_grp_remST[,3:31]
#This subset doesn't include the Nutrient data


#need to make rownames for sites of teh grped datasets
#grouped_rownames_remST <- ant_community_final_calc_grp_remStations$station
#Need to make a new set of rownames to make it cleaner to view
grouped_rownames_remST <- c("A_1", "A_2", "C_1", "C_2", "E_1", "E_2", "G_1", "G_2", "H_1", "H_2", "J_1", "J_2", "K_1", "K_2", "L_1", "L_2", "M_1", "M_2")
rownames(ant_community_final_functional_grp_fixed_remST) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
rownames(ant_community_final_calc_bacterivory_grp_fixed_remST) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
rownames(ant_community_final_calc_env_grp_fixed_remST) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
rownames(ant_community_final_calc_grp_remStations) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
functional_prcomp_grp_remST <- prcomp(ant_community_final_functional_grp_fixed_remST, center = TRUE, scale = TRUE)
bacterivory_prcomp_grp_remST <- prcomp(ant_community_final_calc_bacterivory_grp_fixed_remST, center = TRUE, scale = TRUE)

#functional plot w/stations rem data
autoplot(functional_prcomp_grp_remST, data = as.data.frame(ant_community_final_calc_grp_remStations), label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'depth', label.repel = TRUE)

#bacterivory plot w/stations rem data
autoplot(bacterivory_prcomp_grp_remST, data = as.data.frame(ant_community_final_calc_grp_remStations), label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'depth', label.repel = TRUE)

#I need to now reduce the abundance PCAs to compare PP and bacterivorous groups separately
ant_community_final_functional_grp_fixed_remST_PP <- ant_community_final_functional_grp_fixed_remST[,c(1,3)]
ant_community_final_functional_grp_fixed_remST_hetero <- ant_community_final_functional_grp_fixed_remST[,3:4]

rownames(ant_community_final_functional_grp_fixed_remST_PP) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
rownames(ant_community_final_functional_grp_fixed_remST_hetero) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
functional_prcomp_grp_remST_PP <- prcomp(ant_community_final_functional_grp_fixed_remST_PP, center = TRUE, scale = TRUE)
bacterivory_prcomp_grp_remST_hetero <- prcomp(ant_community_final_functional_grp_fixed_remST_hetero, center = TRUE, scale = TRUE)

#PP functional plot w/stations removed
autoplot(functional_prcomp_grp_remST_PP, data = as.data.frame(ant_community_final_calc_grp_remStations), label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'depth', label.repel = TRUE)

#Hetero functional plot w/stations removed
autoplot(bacterivory_prcomp_grp_remST_hetero, data = as.data.frame(ant_community_final_calc_grp_remStations), label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'depth', label.repel = TRUE)

#Now I want to make the plots to include important environmental variables
#Need to decompose the scores into dataframes and build a custom plot from there.
#Step 1: envfit on each PCA
#Step 2: dataframes of scores and species from initial PCAs
#Step 3: Readd any grouping variables such as "Group" or "depth"
#Step 4: Extract envfit scores: separate for continuous or discrete variables (only cont for this) "vectors" for continuous and "factors" for discrete variables.
#Step 5: build the plot

#All the fits in this same section---------
functional_prcomp_grp_remST_fit <- envfit(functional_prcomp_grp_remST ~ ., ant_community_final_calc_env_grp_remST, perm = 999)
#rows of signifigance: 2,5,9:11,15:21,30,31,33
bacterivory_prcomp_grp_remST_fit <- envfit(bacterivory_prcomp_grp_remST ~ ., ant_community_final_calc_env_grp_remST, perm = 999)
#rows of signifigance: 5,9,10,11,16:21,30,31,33,34
functional_prcomp_grp_remST_PP_fit <- envfit(functional_prcomp_grp_remST_PP ~ ., ant_community_final_calc_env_grp_remST, perm = 999)
#rows of signifigance: 2:7,9:11,15:21,30,31,33
bacterivory_prcomp_grp_remST_hetero_fit <- envfit(bacterivory_prcomp_grp_remST_hetero ~ ., ant_community_final_calc_env_grp_remST, perm = 999)
#rows of signifigance: 2,4,6,7,9:11,16:20,30

#Now I will do each plot in their own chunks

#All functional groups plot-------
#scores and species of initial PCA of functional responses
functional_prcomp_grp_remST_scores <- as.data.frame(scores(functional_prcomp_grp_remST))
functional_prcomp_grp_remST_species <- as.data.frame(scores(functional_prcomp_grp_remST, display = "species"))
functional_prcomp_grp_remST_scores$depth <- ant_community_final_calc_grp_remStations$depth
functional_prcomp_grp_remST_scores$Group <- ant_community_final_calc_grp_remStations$Group
#scores of vectors from the envfit used to then plot on top of original PCAs they were fit to
functional_prcomp_grp_remST_fit_scores <- as.data.frame(scores(functional_prcomp_grp_remST_fit, "vectors"))*2

#By Group
ggplot(data = functional_prcomp_grp_remST_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),1], yend = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),2]))+
  geom_text(data = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),], aes(x = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),1], y = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),]))+
  geom_segment(data = functional_prcomp_grp_remST_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = functional_prcomp_grp_remST_species, aes(x = PC1, y = PC2), label = rownames(functional_prcomp_grp_remST_species))+
  geom_text(size = 3,label = rownames(functional_prcomp_grp_remST_scores))

#By Depth
ggplot(data = functional_prcomp_grp_remST_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),1], yend = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),2]))+
  geom_text(data = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),], aes(x = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),1], y = functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_fit_scores[c(2,5,9:11,15:21,30,31,33),]))+
  geom_segment(data = functional_prcomp_grp_remST_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = functional_prcomp_grp_remST_species, aes(x = PC1, y = PC2), label = rownames(functional_prcomp_grp_remST_species))+
  geom_text(size = 3,label = rownames(functional_prcomp_grp_remST_scores))

#Bacterivorous activity plot---------
bacterivory_prcomp_grp_remST_scores <- as.data.frame(scores(bacterivory_prcomp_grp_remST))
bacterivory_prcomp_grp_remST_species <- as.data.frame(scores(bacterivory_prcomp_grp_remST, display = "species"))
bacterivory_prcomp_grp_remST_scores$depth <- ant_community_final_calc_grp_remStations$depth
bacterivory_prcomp_grp_remST_scores$Group <- ant_community_final_calc_grp_remStations$Group
bacterivory_prcomp_grp_remST_fit_scores <- as.data.frame(scores(bacterivory_prcomp_grp_remST_fit, "vectors"))*2

#By Group
ggplot(data = bacterivory_prcomp_grp_remST_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),], aes(x = 0, y = 0, xend = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),1], yend = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),2]))+
  geom_text(data = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),], aes(x = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),1], y = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),2]-0.04), fontface = "bold", label = rownames(bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),]))+
  geom_segment(data = bacterivory_prcomp_grp_remST_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = bacterivory_prcomp_grp_remST_species, aes(x = PC1, y = PC2), label = rownames(bacterivory_prcomp_grp_remST_species))+
  geom_text(aes(x = PC1, y = PC2-0.1),size = 3,label = rownames(bacterivory_prcomp_grp_remST_scores))

#By Depth
ggplot(data = bacterivory_prcomp_grp_remST_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),], aes(x = 0, y = 0, xend = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),1], yend = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),2]))+
  geom_text(data = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),], aes(x = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),1], y = bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),2]-0.04), fontface = "bold", label = rownames(bacterivory_prcomp_grp_remST_fit_scores[c(5,9,10,11,16:21,30,31,33,34),]))+
  geom_segment(data = bacterivory_prcomp_grp_remST_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = bacterivory_prcomp_grp_remST_species, aes(x = PC1, y = PC2), label = rownames(bacterivory_prcomp_grp_remST_species))+
  geom_text(aes(x = PC1, y = PC2-0.1),size = 3,label = rownames(bacterivory_prcomp_grp_remST_scores))

#All PP function group comparison plot---------
functional_prcomp_grp_remST_PP_scores <- as.data.frame(scores(functional_prcomp_grp_remST_PP))
functional_prcomp_grp_remST_PP_species <- as.data.frame(scores(functional_prcomp_grp_remST_PP, display = "species"))
functional_prcomp_grp_remST_PP_scores$depth <- ant_community_final_calc_grp_remStations$depth
functional_prcomp_grp_remST_PP_scores$Group <- ant_community_final_calc_grp_remStations$Group
functional_prcomp_grp_remST_PP_fit_scores <- as.data.frame(scores(functional_prcomp_grp_remST_PP_fit, "vectors"))*2

#By Group
ggplot(data = functional_prcomp_grp_remST_PP_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),1], yend = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),2]))+
  geom_text(data = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),], aes(x = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),1], y = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),]))+
  geom_segment(data = functional_prcomp_grp_remST_PP_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = functional_prcomp_grp_remST_PP_species, aes(x = PC1, y = PC2), label = rownames(functional_prcomp_grp_remST_PP_species))+
  geom_text(size = 2,label = rownames(functional_prcomp_grp_remST_PP_scores))

#By Depth
ggplot(data = functional_prcomp_grp_remST_PP_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),1], yend = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),2]))+
  geom_text(data = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),], aes(x = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),1], y = functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_PP_fit_scores[c(2:7,9:11,15:21,30,31,33),]))+
  geom_segment(data = functional_prcomp_grp_remST_PP_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = functional_prcomp_grp_remST_PP_species, aes(x = PC1, y = PC2), label = rownames(functional_prcomp_grp_remST_PP_species))+
  geom_text(size = 2,label = rownames(functional_prcomp_grp_remST_PP_scores))

#All Hetero function group comparison plot------------
bacterivory_prcomp_grp_remST_hetero_scores <- as.data.frame(scores(bacterivory_prcomp_grp_remST_hetero))
bacterivory_prcomp_grp_remST_hetero_species <- as.data.frame(scores(bacterivory_prcomp_grp_remST_hetero, display = "species"))
bacterivory_prcomp_grp_remST_hetero_scores$depth <- ant_community_final_calc_grp_remStations$depth
bacterivory_prcomp_grp_remST_hetero_scores$Group <- ant_community_final_calc_grp_remStations$Group
bacterivory_prcomp_grp_remST_hetero_fit_scores <- as.data.frame(scores(bacterivory_prcomp_grp_remST_hetero_fit, "vectors"))*2

#By Group
ggplot(data = bacterivory_prcomp_grp_remST_hetero_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),], aes(x = 0, y = 0, xend = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),1], yend = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),2]))+
  geom_text(data = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),], aes(x = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),1], y = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),2]-0.04), fontface = "bold", label = rownames(bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),]))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = bacterivory_prcomp_grp_remST_hetero_species, aes(x = PC1, y = PC2), label = rownames(bacterivory_prcomp_grp_remST_hetero_species))+
  geom_text(aes(x = PC1, y = PC2-0.1), size = 3,label = rownames(bacterivory_prcomp_grp_remST_hetero_scores))

#By Depth
ggplot(data = bacterivory_prcomp_grp_remST_hetero_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),], aes(x = 0, y = 0, xend = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),1], yend = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),2]))+
  geom_text(data = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),], aes(x = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),1], y = bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),2]-0.04), fontface = "bold", label = rownames(bacterivory_prcomp_grp_remST_hetero_fit_scores[c(2,4,6,7,9:11,16:20,30),]))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = bacterivory_prcomp_grp_remST_hetero_species, aes(x = PC1, y = PC2), label = rownames(bacterivory_prcomp_grp_remST_hetero_species))+
  geom_text(aes(x = PC1, y = PC2-0.1), size = 3,label = rownames(bacterivory_prcomp_grp_remST_hetero_scores))

#I have too many co-correlated/redundant variables which may be hindering or hiding signals from important variables. Curating list of variables to keep/throw out.

#KEEP TS ZML ONLY
#remove: spar (pressure), timesS (elapsed time/length of cast), scan (ctd scan number), nbin (# of data cycles to create an average at each depth/bin interval), prDM(pressure/same thing as depth), depSM (depth), altM (altitude of sampler above bottom), flourescence *shows some diff from Chla albeit minimal*, Total DIN (mainly NO2/NO3 so it's redundant), bact_mL (bprod is a more accurate representation of what's happening with bacteria), either Oxy or oxy sat, and maybe N/P ratio ***leaving it in for now***.

#Now I am going to redo all the fit analysis with removing the cocorrelates or redundant variables that could be influencing the significance of potentially important variables. Then use that analysis to inform a PerMANOVA with an analysis of variance using only the important variables.



#NEED to scale the original data and use vegdist to get a matrix input and NOT the raw prcomp to use adonis...
#I have found a script that describes how to do model selection using adonis output by kdyson
#github link: https://github.com/kdyson/R_Scripts
#bacterivory_adonis <- adonis(vegdist(scale(ant_community_final_calc_bacterivory_grp_fixed_remST), "euclidean") ~ bprod+NH4+PAR+Chla+ZML+Time+NO2_NO3+Total_DIN+N_P_Ratio, data = ant_community_final_calc_env_grp_remST, perm = 9999)
#Now I am going to redo all the fit analysis with removing the cocorrelates or redundant variables that could be influencing the significance of potentially important variables. Then use that analysis to inform a PerMANOVA with an analysis of variance using only the important variables.

#using objects created from the previous code chunk directly above.
#namely ant_community_final_calc_grp_remStations, ant_community_final_functional_grp_remST, ant_community_final_calc_bacterivory_grp_remST, ant_community_final_calc_env_grp_remST

#List of correlates to remove or possibly keep...
#KEEP TS ZML ONLY
#remove: spar (pressure), timesS (elapsed time/length of cast), scan (ctd scan number), nbin (# of data cycles to create an average at each depth/bin interval), prDM(pressure/same thing as depth), depSM (depth), altM (altitude of sampler above bottom), flourescence *shows some diff from Chla albeit minimal*, Total DIN (mainly NO2/NO3 so it's redundant), bact_mL (bprod is a more accurate representation of what's happening with bacteria), either Oxy or oxy sat, and maybe N/P ratio ***leaving it in for now***.

#Removing the redundant variables from ant_community_final_calc_env_grp_remST

#Vector containing the names of variables I wanna remove (i can add or remove anytime I want by creating a vector for easier coding)

redundant_vars_vector <- c("ZML_T_m..0.2ºC.", "ZML_TSP_m..0.023Kg.m3.", "ZE", "ZCM_m..maxFluo.", "spar", "scan", "nbin", "depSM", "altM", "prDM", "Total_DIN", "Bact_mL", "timeS","Ze_m..1..")

ant_community_final_calc_env_grp_remST_remREDUND <- ant_community_final_calc_env_grp_remST[,which(!colnames(ant_community_final_calc_env_grp_remST) %in% redundant_vars_vector)]

#I don't need to redo the original PCA analysis of each of the responses measured. I only need to remake the envfits and then make new plots.

#Remakes of the envfits here:----
functional_prcomp_grp_remST_remREDUND_fit <- envfit(functional_prcomp_grp_remST ~ ., ant_community_final_calc_env_grp_remST_remREDUND, perm = 999)
#rows of signifigance: 2,5,6,9:11,13:15,19,20 
#Vars for adonis: Latitude, NO2_NO3, fluorescence, ice, ZML,PAR, oxygen
bacterivory_prcomp_grp_remST_remREDUND_fit <- envfit(bacterivory_prcomp_grp_remST ~ ., ant_community_final_calc_env_grp_remST_remREDUND, perm = 999)
#rows of signifigance: 5,6,10,11,13:15,19,20,22
#Vars for adonis: Latitude, flourescence, ZML, NH4, Chla,bprod, NO2_NO3
functional_prcomp_grp_remST_PP_remREDUND_fit <- envfit(functional_prcomp_grp_remST_PP ~ ., ant_community_final_calc_env_grp_remST_remREDUND, perm = 999)
#rows of signifigance: 2,4:6,9:11,13:15,19,20
#Vars for adonis:latitude, NO2_NO3, ZML, ice, depth_m, Chla, oxygen, flourescence, beamTrans,Time, HN4, (PAR *close but not sig*)
bacterivory_prcomp_grp_remST_hetero_remREDUND_fit <- envfit(bacterivory_prcomp_grp_remST_hetero ~ ., ant_community_final_calc_env_grp_remST_remREDUND, perm = 999)
#rows of signifigance: 2,4,6,10:14,19
#Vars for adonis:PAR, NH4, flourescence, ZML, depth_m, ice, (latitude *close but not sig*)

###***IMPORTANT NOTE*** - subset 4 = depth_m - and I removed for the sake of the presentation need to think of it's importance for the UPDATED second presentation as it showed as significant for multiple PCA analyses.I will run an adonis now but will run my model selection for UPDATE

#Now I need to remake all of the plots I made previously with the new dataset
#I'm going to copy and replace my code with the new envfit objects and significant vars.
#**NEEEEEED** to replace ZML name but not sure where in the code makes the most sense.
#All bad names have been replaced.

#Now I will do each plot in their own chunks

#All functional groups plot Redundacy Removed-------
#scores and species of initial PCA of functional responses
functional_prcomp_grp_remST_scores_noRed <- as.data.frame(scores(functional_prcomp_grp_remST))
functional_prcomp_grp_remST_species_noRed <- as.data.frame(scores(functional_prcomp_grp_remST, display = "species"))*2
functional_prcomp_grp_remST_scores_noRed$depth <- ant_community_final_calc_grp_remStations$depth
functional_prcomp_grp_remST_scores_noRed$Group <- ant_community_final_calc_grp_remStations$Group
#scores of vectors from the envfit used to then plot on top of original PCAs they were fit to
functional_prcomp_grp_remST_fit_scores_noRed <- as.data.frame(scores(functional_prcomp_grp_remST_remREDUND_fit, "vectors"))*2.4

#By Group
ggplot(data = functional_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),1], yend = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),2]))+
  geom_text_repel(data = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),], aes(x = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),1]+0.1, y = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),2]), fontface = "bold", label = rownames(functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),]), nudge_y = -0.03)+
  geom_segment(data = functional_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = functional_prcomp_grp_remST_species_noRed, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(functional_prcomp_grp_remST_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_scores_noRed))

#By Depth
ggplot(data = functional_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),1], yend = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),2]))+
  geom_text_repel(data = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),], aes(x = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),1], y = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),]))+
  geom_segment(data = functional_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = functional_prcomp_grp_remST_species_noRed, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(functional_prcomp_grp_remST_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1),size = 3,label = rownames(functional_prcomp_grp_remST_scores_noRed))

#Bacterivorous activity plot Redundancy Removed---------
bacterivory_prcomp_grp_remST_scores_noRed <- as.data.frame(scores(bacterivory_prcomp_grp_remST))
bacterivory_prcomp_grp_remST_species_noRed <- as.data.frame(scores(bacterivory_prcomp_grp_remST, display = "species"))*2
bacterivory_prcomp_grp_remST_scores_noRed$depth <- ant_community_final_calc_grp_remStations$depth
bacterivory_prcomp_grp_remST_scores_noRed$Group <- ant_community_final_calc_grp_remStations$Group
bacterivory_prcomp_grp_remST_fit_scores_noRed <- as.data.frame(scores(bacterivory_prcomp_grp_remST_remREDUND_fit, "vectors"))*2

#By Group
ggplot(data = bacterivory_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),], aes(x = 0, y = 0, xend = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),1], yend = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),2]))+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),], aes(x = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),1], y = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),2]-0.04), fontface = "bold", label = rownames(bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),]), nudge_x = -0.1)+
  geom_segment(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = PC1, y = PC2), label = rownames(bacterivory_prcomp_grp_remST_species_noRed), nudge_y = 0.04, color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1),size = 3,label = rownames(bacterivory_prcomp_grp_remST_scores_noRed))

#By Depth
ggplot(data = bacterivory_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),], aes(x = 0, y = 0, xend = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),1], yend = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),2]))+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),], aes(x = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),1], y = bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),2]-0.04), fontface = "bold", label = rownames(bacterivory_prcomp_grp_remST_fit_scores_noRed[c(5,6,10,11,13:15,19,20,22),]), nudge_x = -0.1)+
  geom_segment(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = PC1, y = PC2), label = rownames(bacterivory_prcomp_grp_remST_species_noRed), nudge_y = 0.04, color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1),size = 3,label = rownames(bacterivory_prcomp_grp_remST_scores_noRed))

#All PP function group comparison plot Redundancy Removed---------
functional_prcomp_grp_remST_PP_scores_noRed <- as.data.frame(scores(functional_prcomp_grp_remST_PP))
functional_prcomp_grp_remST_PP_species_noRed <- as.data.frame(scores(functional_prcomp_grp_remST_PP, display = "species"))*2
functional_prcomp_grp_remST_PP_scores_noRed$depth <- ant_community_final_calc_grp_remStations$depth
functional_prcomp_grp_remST_PP_scores_noRed$Group <- ant_community_final_calc_grp_remStations$Group
functional_prcomp_grp_remST_PP_fit_scores_noRed <- as.data.frame(scores(functional_prcomp_grp_remST_PP_remREDUND_fit, "vectors"))*2

#By Group
ggplot(data = functional_prcomp_grp_remST_PP_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),1], yend = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),2]))+
  geom_text_repel(data = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),], aes(x = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),1], y = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),]))+
  geom_segment(data = functional_prcomp_grp_remST_PP_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = functional_prcomp_grp_remST_PP_species_noRed, aes(x = PC1, y = PC2), label = rownames(functional_prcomp_grp_remST_PP_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1),size = 3,label = rownames(functional_prcomp_grp_remST_PP_scores_noRed))

#By Depth
ggplot(data = functional_prcomp_grp_remST_PP_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),1], yend = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),2]))+
  geom_text_repel(data = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),], aes(x = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),1], y = functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_PP_fit_scores_noRed[c(2,5:6,9:11,13:15,19,20),]))+
  geom_segment(data = functional_prcomp_grp_remST_PP_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = functional_prcomp_grp_remST_PP_species_noRed, aes(x = PC1, y = PC2), label = rownames(functional_prcomp_grp_remST_PP_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_PP_scores_noRed))

#All Hetero function group comparison plot Redundacy Removed------------
bacterivory_prcomp_grp_remST_hetero_scores_noRed <- as.data.frame(scores(bacterivory_prcomp_grp_remST_hetero))
bacterivory_prcomp_grp_remST_hetero_species_noRed <- as.data.frame(scores(bacterivory_prcomp_grp_remST_hetero, display = "species"))*2
bacterivory_prcomp_grp_remST_hetero_scores_noRed$depth <- ant_community_final_calc_grp_remStations$depth
bacterivory_prcomp_grp_remST_hetero_scores_noRed$Group <- ant_community_final_calc_grp_remStations$Group
bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed <- as.data.frame(scores(bacterivory_prcomp_grp_remST_hetero_remREDUND_fit, "vectors"))*2

#By Group
ggplot(data = bacterivory_prcomp_grp_remST_hetero_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),], aes(x = 0, y = 0, xend = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),1], yend = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),2]))+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),], aes(x = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),1], y = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),2]-0.04), fontface = "bold", label = rownames(bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),]))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = bacterivory_prcomp_grp_remST_hetero_species_noRed, aes(x = PC1, y = PC2), label = rownames(bacterivory_prcomp_grp_remST_hetero_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(bacterivory_prcomp_grp_remST_hetero_scores_noRed))

#By Depth
ggplot(data = bacterivory_prcomp_grp_remST_hetero_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),], aes(x = 0, y = 0, xend = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),1], yend = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),2]))+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),], aes(x = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),1], y = bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),2]-0.04), fontface = "bold", label = rownames(bacterivory_prcomp_grp_remST_hetero_fit_scores_noRed[c(2,6,10:14,19),]))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = bacterivory_prcomp_grp_remST_hetero_species_noRed, aes(x = PC1, y = PC2), label = rownames(bacterivory_prcomp_grp_remST_hetero_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(bacterivory_prcomp_grp_remST_hetero_scores_noRed))

#A version of EACH plot without the envfit for presentation-------
#All functional groups
#Need to get the % explained of each axis summary(functional_prcomp_grp_remST)
ALL_abundance_PCA_plot <- ggplot(data = functional_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_segment(data = functional_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red", arrow = arrow(length = unit(0.1, "inches")))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_text_repel(data = functional_prcomp_grp_remST_species_noRed, aes(x = PC1, y = PC2-0.11), label = rownames(functional_prcomp_grp_remST_species_noRed), color = "black", fontface = "bold")+
  geom_text_repel(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_scores_noRed), color = "grey3") + labs(shape = "Depth")+ xlab("PC1 (70.12%)")+ylab("PC2 (16.12%)")+theme_bw()+theme(panel.grid = element_blank())

ALL_abundance_PCA_plot

ggsave("ALL_abundance_PCA.png", plot = ALL_abundance_PCA_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggplot(data = functional_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_segment(data = functional_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_point(aes(color = depth))+
  geom_text_repel(data = functional_prcomp_grp_remST_species_noRed, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(functional_prcomp_grp_remST_species_noRed), color = "black", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_scores_noRed))

#Bacterivory
#Version for the publication
Bacterivory_SS_day_PCA_plot <- ggplot(data = bacterivory_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_segment(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(bacterivory_prcomp_grp_remST_species_noRed), color = "black", fontface = "bold")+
  geom_text_repel(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(bacterivory_prcomp_grp_remST_scores_noRed))+theme_bw()+theme(panel.grid = element_blank())+xlab("PC1 (84%)")+ylab("PC2 (16%)")

#summary(bacterivory_prcomp_grp_remST)

#save plot
Bacterivory_SS_day_PCA_plot

ggsave("Bacterivory_SS_day_PCA_plot.png", plot = Bacterivory_SS_day_PCA_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggplot(data = bacterivory_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_segment(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_point(aes(color = Group))+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(bacterivory_prcomp_grp_remST_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(bacterivory_prcomp_grp_remST_scores_noRed))

ggplot(data = bacterivory_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_segment(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_point(aes(color = depth))+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_species_noRed, aes(x = PC1+0.5, y = PC2), label = rownames(bacterivory_prcomp_grp_remST_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(bacterivory_prcomp_grp_remST_scores_noRed))

#PP
ggplot(data = functional_prcomp_grp_remST_PP_scores_noRed, aes(x = PC1, y = PC2))+
  geom_segment(data = functional_prcomp_grp_remST_PP_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_point(aes(color = Group))+
  geom_text_repel(data = functional_prcomp_grp_remST_PP_species_noRed, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(functional_prcomp_grp_remST_PP_species_noRed), color = "red", fontface = "bold")+
  geom_text_repel(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_PP_scores_noRed))

ggplot(data = functional_prcomp_grp_remST_PP_scores_noRed, aes(x = PC1, y = PC2))+
  geom_segment(data = functional_prcomp_grp_remST_PP_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_point(aes(color = depth))+
  geom_text(data = functional_prcomp_grp_remST_PP_species_noRed, aes(x = PC1+0.5, y = PC2), label = rownames(functional_prcomp_grp_remST_PP_species_noRed), color = "red", fontface = "bold")+
  geom_text_repel(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_PP_scores_noRed))

#Heterotroph
ggplot(data = bacterivory_prcomp_grp_remST_hetero_scores_noRed, aes(x = PC1, y = PC2))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_point(aes(color = Group))+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_hetero_species_noRed, aes(x = PC1+0.3, y = PC2-0.09), label = rownames(bacterivory_prcomp_grp_remST_hetero_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(bacterivory_prcomp_grp_remST_hetero_scores_noRed))

ggplot(data = bacterivory_prcomp_grp_remST_hetero_scores_noRed, aes(x = PC1, y = PC2))+
  geom_segment(data = bacterivory_prcomp_grp_remST_hetero_species_noRed, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_point(aes(color = depth))+
  geom_text_repel(data = bacterivory_prcomp_grp_remST_hetero_species_noRed, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(bacterivory_prcomp_grp_remST_hetero_species_noRed), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(bacterivory_prcomp_grp_remST_hetero_scores_noRed))

#didn't like this version as it doesn't give much information on the relatability of the predictor variables. Redoing this plot with just the Env vars and NOT the species used to make the PCA.
#All_abundance_PCA with env vars--------
ALL_abundance_PCA_plot_envVars <- ggplot(data = functional_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),1], yend = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),], aes(x = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),1], y = functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_fit_scores_noRed[c(2,5,6,9:11,13:15,19,20),]))+
  geom_text_repel(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_scores_noRed), color = "grey3") + labs(shape = "Depth")+ xlab("PC1 (70.12%)")+ylab("PC2 (16.12%)")+theme_bw()+theme(panel.grid = element_blank())

ALL_abundance_PCA_plot_envVars

ggsave("ALL_abundance_PCA_plot_envVars.pdf", plot = ALL_abundance_PCA_plot_envVars, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Without latitdue and longitude-------
#NEW fit without LAT and LONG
ant_community_final_calc_env_grp_remST_remREDUND_noLATLONG <- select(ant_community_final_calc_env_grp_remST_remREDUND, -latitude, -longitude)

functional_prcomp_grp_remST_remREDUND_noLATLONG_fit <- envfit(functional_prcomp_grp_remST ~ ., ant_community_final_calc_env_grp_remST_remREDUND_noLATLONG, perm = 999)
#rows of signifigance: 2,5,6,9:11,13,17,18 
#Vars for adonis: ice, Chla, ZML, oxygen, flourescence, beamTrans, Time, NH4, NO2_NO3
functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_scores <- as.data.frame(scores(functional_prcomp_grp_remST_remREDUND_noLATLONG_fit, "vectors"))*2.4
#How to plot it
ALL_abundance_PCA_plot_envVars_noLATLONG <- ggplot(data = functional_prcomp_grp_remST_scores_noRed, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_scores[c(2,5,6,9:11,13,17,18),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_scores[c(2,5,6,9:11,13,17,18),1], yend = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_scores[c(2,5,6,9:11,13,17,18),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_scores[c(2,5,6,9:11,13,17,18),], aes(x = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_scores[c(2,5,6,9:11,13,17,18),1], y = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_scores[c(2,5,6,9:11,13,17,18),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_scores[c(2,5,6,9:11,13,17,18),]))+
  geom_text_repel(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_scores_noRed), color = "grey3") + labs(shape = "Depth")+ xlab("PC1 (70.12%)")+ylab("PC2 (16.12%)")+theme_bw()+theme(panel.grid = element_blank())

ALL_abundance_PCA_plot_envVars_noLATLONG

ggsave("ALL_abundance_PCA_plot_envVars_noLATLONG.pdf", plot = ALL_abundance_PCA_plot_envVars_noLATLONG, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
###Chunk from previous attempts
#NEED to scale the original data and use vegdist to get a matrix input and NOT the raw prcomp to use adonis...
#I have found a script that describes how to do model selection using adonis output by kdyson
#github link: https://github.com/kdyson/R_Scripts

#bacterivory_adonis <- adonis(vegdist(scale(ant_community_final_calc_bacterivory_grp_fixed_remST), "euclidean") ~ bprod+NH4+PAR1+Chla+ZML_TS_m..0.023Kg.m3.+timeJ+NO2_NO3+Total_DIN+N_P_Ratio, data = ant_community_final_calc_env_grp_remST, perm = 9999)
###

#object inputs must be a vegdist - I did prcomp so it won't take. convert using vegdist dissimarity matrix using vegdist() but all things need to be scaled
#All functions object: ant_community_final_functional_grp_fixed_remST
#Bacterivory object: ant_community_final_calc_bacterivory_grp_fixed_remST

#only taking the 4 most influential vars by envfit p value
#Latitude, flourescence, ZML, NH4
#all sig vars: Latitude, flourescence, ZML, NH4, Chla,bprod, NO2_NO3
bacterivory_adonis <- adonis2(vegdist(scale(ant_community_final_calc_bacterivory_grp_fixed_remST), "euclidean") ~ latitude+NH4+ZML+Chla, data = ant_community_final_calc_env_grp_remST, perm = 9999, method = "euclidean")

#Latitude, NO2_NO3, fluorescence, ice
#all sig vars:Latitude, NO2_NO3, fluorescence, ice, ZML,PAR, oxygen
#functional_adonis <- adonis(vegdist(scale(ant_community_final_functional_grp_fixed_remST), "euclidean") ~ latitude+ice+fluorescence+NH4, data = ant_community_final_calc_env_grp_remST, perm = 9999, method = "euclidean")

#This adonis call is not a saved object and used as a test to see the outputs of
#adonis models
#adonis2(functional_dist_matrix ~ latitude+ice+fluorescence+NH4, data = ant_community_final_calc_env_grp_remST, perm = 9999, method = "euclidean")

#Now performing model selection on my results. 
#First source R script files that contain the functions needed to do model selection.

#source("/Users/christophercarnivale/Desktop/Dissertation_data/R_Scripts_adonis_modelselection/AICc_compare.R") doesn't work, just a test script

source("/Users/christophercarnivale/Desktop/Dissertation_data/R_Scripts_adonis_modelselection/AICc_PERMANOVA.R")

#source("/Users/christophercarnivale/Desktop/Dissertation_data/R_Scripts_adonis_modelselection/AICc_table_generation.R")

#Second create objects of the distance matrices used in previous adonis calls for AICc table function
bacterivory_dist_matrix <- vegdist(scale(ant_community_final_calc_bacterivory_grp_fixed_remST), "euclidean")

functional_dist_matrix <- vegdist(scale(ant_community_final_functional_grp_fixed_remST), "euclidean")

#Third create vector obects with significant vars from each env fit
bacterivory_sig_vars <- c("latitude","NH4","ZML","Chla")

functional_sig_vars <- c("latitude","ice","fluorescence","NH4")

#First test for the output
#AICc.table.Nvar(bacterivory_sig_vars, matrix.char = bacterivory_dist_matrix, perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

#Initial run of the previous sourced functions didn't work with the current structuring of the data and the evolution of the r packages.

#Script has been edited and fixed by me and JD. New sourcing and function here:
source("/Users/christophercarnivale/Desktop/Dissertation_data/R_Scripts_adonis_modelselection/AICc_table_generation_edited.R")
## Loading required package: DescTools
## Registered S3 method overwritten by 'DescTools':
##   method         from  
##   reorder.factor gplots
## 
## Attaching package: 'DescTools'
## The following object is masked from 'package:gplots':
## 
##     reorder.factor
#Bacterivory AIC model selection
ant_community_final_calc_bacterivory_grp_fixed_remST_MoSelNvar <- AICc.table.Nvar(bacterivory_sig_vars, matrix.char = bacterivory_dist_matrix, perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

ant_community_final_calc_bacterivory_grp_fixed_remST_MoSelALL <- AICc.table.all(bacterivory_sig_vars, matrix.char = bacterivory_dist_matrix, perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
#Functional AIC Model selection
ant_community_final_functional_grp_fixed_remST_MoSelNvar <- AICc.table.Nvar(functional_sig_vars, matrix.char = functional_dist_matrix, perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

ant_community_final_functional_grp_fixed_remST_MoSelALL <- AICc.table.all(functional_sig_vars, matrix.char = functional_dist_matrix, perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
#I will quickly add AIC model selections for PP and hetero only functional analysis

#Functional PP model selection
#Sig Vars: latitude, NO2_NO3, ZML, ice, depth_m, Chla, oxygen, flourescence, beamTrans,Time, HN4
ant_community_final_functional_grp_fixed_remST_PP_MoSelNvar <- AICc.table.Nvar(c("latitude", "NO2_NO3", "ZML", "ice"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST_PP), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

ant_community_final_functional_grp_fixed_remST_PP_MoSelALL <- AICc.table.all(c("latitude", "NO2_NO3", "ZML", "ice"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST_PP), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
#Functional hetero Model selection
#Sig Vars: PAR, NH4, flourescence, ZML, depth_m, ice
ant_community_final_functional_grp_fixed_remST_hetero_MoSelNvar <- AICc.table.Nvar(c("PAR", "NH4", "fluorescence", "ZML"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST_hetero), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

ant_community_final_functional_grp_fixed_remST_hetero_MoSelALL <- AICc.table.all(c("PAR", "NH4", "fluorescence", "ZML"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST_hetero), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
#Objects for adonis and envfit from initial feeding results-------
#Listing the objects used for making the adonis/enfit tables with basic descriptions of whats contained in them for easier readability and look back...1)envfit 2) prcomp 3) base matrix

#functional_prcomp_grp_remST_remREDUND_fit - All abundance measures PNAN, ANAN, MNAN, HNAN
#functional_prcomp_grp_remST
#ant_community_final_functional_grp_fixed_remST
#bacterivory_prcomp_grp_remST_remREDUND_fit - Just SS per day of MNAN and HNAN
#bacterivory_prcomp_grp_remST
#ant_community_final_calc_bacterivory_grp_fixed_remST
#functional_prcomp_grp_remST_PP_remREDUND_fit - Abundance of just MNAN and ANAN
#functional_prcomp_grp_remST_PP
#ant_community_final_functional_grp_fixed_remST_PP
#bacterivory_prcomp_grp_remST_hetero_remREDUND_fit - Abundance of just MNAN and HNAN
#bacterivory_prcomp_grp_remST_hetero
#ant_community_final_functional_grp_fixed_remST_hetero

#Adonis Selection and then fit without LAT/LONG---------
#The call shouldn't be any different except using the env df without LAT/LONG, same scaling call
View(AICc.table.all(c("ice", "Chla", "ZML", "oxygen", "fluorescence", "Time", "NH4", "NO2_NO3"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST_remREDUND_noLATLONG, comb.incl = c(1,2,3)))
## [1] 1
## [1] 2
## [1] 3
ant_community_final_calc_env_grp_remST_remREDUND_forpca <- ant_community_final_calc_env_grp_remST_remREDUND[,c(4:6,8:13,18:24)]
#need to add rownames real quick first to make labelling easier
rownames(ant_community_final_calc_env_grp_remST_remREDUND_forpca) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
env_prcomp_grp_remST <- prcomp(ant_community_final_calc_env_grp_remST_remREDUND_forpca, center = TRUE, scale = TRUE)

summary(env_prcomp_grp_remST)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.8932 1.7361 1.3442 1.03560 0.80118 0.66371 0.53848
## Proportion of Variance 0.5232 0.1884 0.1129 0.06703 0.04012 0.02753 0.01812
## Cumulative Proportion  0.5232 0.7116 0.8245 0.89152 0.93164 0.95917 0.97729
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.41431 0.30443 0.22363 0.17911 0.11598 0.04985 0.03022
## Proportion of Variance 0.01073 0.00579 0.00313 0.00201 0.00084 0.00016 0.00006
## Cumulative Proportion  0.98802 0.99381 0.99694 0.99895 0.99979 0.99994 1.00000
##                            PC15     PC16
## Standard deviation     0.004415 0.001579
## Proportion of Variance 0.000000 0.000000
## Cumulative Proportion  1.000000 1.000000
#Now I need to make the plot with the env variables
env_prcomp_grp_remST_scores <- as.data.frame(scores(env_prcomp_grp_remST))
env_prcomp_grp_remST_species <- as.data.frame(scores(env_prcomp_grp_remST, display = "species"))*7
env_prcomp_grp_remST_scores$depth <- ant_community_final_calc_env_grp_remST$depth
env_prcomp_grp_remST_scores$Group <- ant_community_final_calc_env_grp_remST$Group

environmental_PCA_plot <- ggplot(data = env_prcomp_grp_remST_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth))+
  geom_segment(data = env_prcomp_grp_remST_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "black")+
  geom_text_repel(data = env_prcomp_grp_remST_species, aes(x = PC1, y = PC2), label = rownames(env_prcomp_grp_remST_species), color = "black", fontface = "bold", size = 3)+
  geom_text_repel(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(env_prcomp_grp_remST_scores))+ labs(shape = "Depth") + xlab("PC1 (52.32%)") +ylab("PC2 (18.84%)")

environmental_PCA_plot

ggsave("environmental_PCA.png", plot = environmental_PCA_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image

I have done all of the main analyses I can do on the functional patterns (PP, hetero, and whole community), as well as bacterivorous activity rates (% standing stock removed per day).

I want to test the correlation of all the variables to one another.

correl_test <- cor.test(ant_community_final_calc_grp_remStations$MNAN_SS_perday, ant_community_final_calc_grp_remStations$latitude)

#Need to create a correlation matrix AND a p-value matrix for said correlations
cor_rownames <- colnames(ant_community_final_calc_grp_remStations)
cor_colnames <- colnames(ant_community_final_calc_grp_remStations)
cor_rownames <- cor_rownames[-(1:3)]
cor_colnames <- cor_colnames[-(1:3)]
cor_matrix <- matrix(nrow = length(colnames(ant_community_final_calc_grp_remStations))-3,
                     ncol = length(colnames(ant_community_final_calc_grp_remStations))-3, dimnames = list(cor_rownames, cor_colnames))
cor_p_matrix <- matrix(nrow = length(colnames(ant_community_final_calc_grp_remStations))-3,
                       ncol = length(colnames(ant_community_final_calc_grp_remStations))-3, dimnames = list(cor_rownames, cor_colnames))

#used to debug my code
#for(i in colnames(cor_matrix)){
#  temp_list <- ant_community_final_calc_grp_remStations[,i]
#  print(temp_list)
#}
#str(temp_list)
#str(unlist(ant_community_final_calc_grp_remStations[,"Lat_deg"]))

#str(ant_community_final_calc_grp_remStations[,-(1:3)])
temp_i_vector <- vector()
temp_j_vector <- vector()
for(i in colnames(cor_matrix)){
  for(j in rownames(cor_matrix)){
    temp_i_vector <- as.numeric(unlist(ant_community_final_calc_grp_remStations[,i]))
    temp_j_vector <- as.numeric(unlist(ant_community_final_calc_grp_remStations[,j]))
    temp_cor <- cor.test(temp_i_vector,temp_j_vector)
    cor_matrix[j,i] <- temp_cor[["estimate"]]
    cor_p_matrix[j,i] <- temp_cor[["p.value"]]
  }
}

cor_matrix_melt <- melt(cor_matrix)
cor_p_matrix_melt <- melt(cor_p_matrix)

ggplot(cor_matrix_melt, aes(X1,X2))+
  geom_tile(aes(fill = value))+scale_fill_gradient2()

heatmap.2(cor_matrix, dendrogram = "none",col = redblue(200), trace = "none", tracecol = "black")

#Correlation of important response variables to all other variables
cor_matrix_melt_filter <- melt(cor_matrix[c("bprod", "Bact_mL","MNAN_SS_perday", "HNAN_SS_perday", "ANAN_mL", "PNAN_mL", "MNAN_mL", "HNAN_mL"),])
cor_p_matrix_melt_filter <- melt(cor_p_matrix[c("bprod", "Bact_mL","MNAN_SS_perday", "HNAN_SS_perday", "ANAN_mL", "PNAN_mL", "MNAN_mL", "HNAN_mL"),])

ggplot(cor_matrix_melt_filter, aes(X1,X2))+
  geom_tile(aes(fill = value))+scale_fill_gradient2()+
  theme(axis.text.y = element_text(size = 5), axis.text.x = element_text(size = 8))+
  geom_text(data = cor_p_matrix_melt_filter, label = round(cor_p_matrix_melt_filter[,3], 3), size = 1.9)

#adding pvalue to the melted data to test of another way to show my data
cor_matrix_melt_filter$pvalue <- cor_p_matrix_melt_filter$value

ggplot(cor_matrix_melt_filter, aes(X1,X2))+
  geom_tile(aes(fill = value, color = pvalue<0.05), lwd = 0.5, height = 0.75, width = 0.95)+scale_color_manual(name ='pvalue<0.05', values = setNames(c('black','White'), c(T,F)))+scale_fill_gradient2()+
  theme(axis.text.y = element_text(size = 5), axis.text.x = element_text(size = 8))+
  geom_text(label = round(cor_matrix_melt_filter$pvalue, 3), size = 1.5)

#Cocorrelation of only the env variables to one another
#The previous matrix had all columns done at once so I should be able to subset the rows and columns for just the env variables.
#Correlation of env vars to env vars to see which vars to cocorrelates to remove
cor_martrix_env_filter <- melt(cor_matrix[which(colnames(cor_matrix) %in% colnames(ant_community_final_calc_env_grp_remST)), which(rownames(cor_matrix) %in% colnames(ant_community_final_calc_env_grp_remST))])

cor_p_martrix_env_filter <- melt(cor_p_matrix[which(colnames(cor_p_matrix) %in% colnames(ant_community_final_calc_env_grp_remST)), which(rownames(cor_p_matrix) %in% colnames(ant_community_final_calc_env_grp_remST))])

#adding Pvalues to the melted data
cor_martrix_env_filter$pvalue <- cor_p_martrix_env_filter$value

#Plotting env var to env var correlation
env_to_env_correlationmatrix <- ggplot(cor_martrix_env_filter, aes(X1,X2))+
  geom_tile(aes(fill = value, color = pvalue<0.05), lwd = 0.5, height = 0.75, width = 0.95)+scale_color_manual(name ='pvalue<0.05', values = setNames(c('black','White'), c(T,F)))+scale_fill_gradient2()+
  theme(axis.text.y = element_text(size = 5), axis.text.x = element_text(size = 5))+
  scale_x_discrete(guide = guide_axis(angle = 90))+
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank())

env_to_env_correlationmatrix

ggsave("env_to_env_correlationmatrix.png", plot = env_to_env_correlationmatrix, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Need to make a cleaner plot for publication readiness
#Going to plot respose variables vs. nonRedundant env vars
#ant_community_final_calc_env_grp_remST_remREDUND
#making a list of colnames I want for this:
#with depth_m: c(4:6,8:13,15,18:24)
#without depth_m: c(4:6,8:13,15:24)
#So I actually want everything BUT station letter in here after looking and contemplating how I want to structure the nature of my talk.
#Group and depth are categorical variables and wouldn't show in a correlation analysis. Will mention p-values for group and depth on slides and mention why we chose to use it as a categorical variable as opposed to a continuous variable.

ant_community_final_calc_env_grp_remST_remREDUND_colnames <- colnames(ant_community_final_calc_env_grp_remST_remREDUND[,-c(2,7)])

ant_community_final_calc_env_grp_remST_remREDUND_colnames[24] <- "ZE"
#janky fix but I'm tired and this will work
ant_community_final_calc_env_grp_remST_remREDUND_colnames[25] <- "ZML"

cor_martrix_final_filter <- melt(cor_matrix[which(colnames(cor_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames), c("bprod", "Bact_mL","MNAN_SS_perday", "HNAN_SS_perday", "ANAN_mL", "PNAN_mL", "MNAN_mL", "HNAN_mL")])

cor_p_martrix_final_filter <- melt(cor_p_matrix[which(colnames(cor_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames), c("bprod", "Bact_mL","MNAN_SS_perday", "HNAN_SS_perday", "ANAN_mL", "PNAN_mL", "MNAN_mL", "HNAN_mL")])

#adding Pvalues to the melted data
cor_martrix_final_filter$pvalue <- cor_p_martrix_final_filter$value

ggplot(cor_martrix_final_filter, aes(X1,X2))+
  geom_tile(aes(fill = value, color = pvalue<0.05), lwd = 0.5, height = 0.75, width = 0.95)+scale_color_manual(name ='pvalue<0.05', values = setNames(c('black','White'), c(T,F)))+scale_fill_gradient2()+
  theme(axis.text.y = element_text(size = 5), axis.text.x = element_text(size = 5))+
  geom_text(label = round(cor_martrix_final_filter$value, 3), size = 1.5)+
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank())+
  coord_flip()

#Redo the env correlation matrix to have all of the proper Env vars used in the models with removal of mos of the redundancies to make it paper presentable.
#cor_martrix_env_filter_remREDUND <- melt(cor_matrix[which(colnames(cor_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames), which(rownames(cor_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames)])

#cor_p_martrix_env_filter_remREDUND <- melt(cor_p_matrix[which(colnames(cor_p_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames), which(rownames(cor_p_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames)])

#Depth was missing from the correlation since we added it back into the paper. I will readd depth into the vector of env var names and rerun the correlation plot.
ant_community_final_calc_env_grp_remST_remREDUND_colnames_wDepthm <- ant_community_final_calc_env_grp_remST_remREDUND_colnames

ant_community_final_calc_env_grp_remST_remREDUND_colnames_wDepthm[25] <- "depth_m"

cor_martrix_env_filter_remREDUND <- melt(cor_matrix[which(colnames(cor_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames_wDepthm), which(rownames(cor_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames_wDepthm)])

cor_p_martrix_env_filter_remREDUND <- melt(cor_p_matrix[which(colnames(cor_p_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames_wDepthm), which(rownames(cor_p_matrix) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames_wDepthm)])
#adding Pvalues to the melted data
cor_martrix_env_filter_remREDUND$pvalue <- cor_p_martrix_env_filter_remREDUND$value

#Plotting env var to env var correlation
env_to_env_correlationmatrix_remREDUND <- ggplot(cor_martrix_env_filter_remREDUND, aes(X1,X2))+
  geom_tile(aes(fill = value, color = pvalue<0.05), lwd = 0.5, height = 0.75, width = 0.95)+scale_color_manual(name ='pvalue<0.05', values = setNames(c('black','White'), c(T,F)))+scale_fill_gradient2()+
  theme(axis.text.y = element_text(size = 5), axis.text.x = element_text(size = 5))+
  scale_x_discrete(guide = guide_axis(angle = 90))+
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank())+
  geom_text(label = round(cor_martrix_env_filter_remREDUND$value, 3), size = 1.5)
env_to_env_correlationmatrix_remREDUND

ggsave("env_to_env_correlationmatrix_remREDUND.pdf", plot = env_to_env_correlationmatrix_remREDUND, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image

NEED TO ADD bactml or bprod to plot

#Summary of correlation table: -u_sphere per ml had no correlation with any measured response variables. -Increases in mixo activity and abundance correlate with positively with HNAN activity but not abundance. -Mixo abundance and activity are the only responses not correlated with mixing depth -Total DIN correlated with mixo activity on top of ANAN/PNAN/MNAN abundance -ONLY HNAN abundance was not correlated with time - same thing for latitude (need to account for this or explain it while writing the paper) -Salinity impacts PNAN/ANAN abund and HNAN activity -PO4 did not correlate with any responses and is not a limiting factor in this environment -PAR levels impacted MNAN abundance and just barely off for activity -O2 obviously strong correlates with PNAN/ANANs -Nitrates/nitrites correlate with PNAN/ANAN abundance NOT MNANs or HNANs but they do correlate with their activity - boost prey abundance maybe? -Again Ammonium correlates with HNAN/MNAN activity and MNAN abundance only -Latitude: HNANs are the only group not associated with latitude -MNAN/HNAN both correlate with depth BUT not their activity -bprod correlates with HNAN activity -Ice correlates with everything NOT Mixo related

BACTERIA production -correlated with ice/NO2_NO3/PO4/mixing depth/O2/longitude/latitude/flourescence/beamtrans/ conductivity/ -bacteria is the only one correlated to PO4

#Initial Interpretation of correlation table Bacteria: Reduced Ice cover increases bacterial production

HNAN: Abundance is not impacted by latitude but their activity is….the further south the more feeding and feeding per cell. This may be due to increased bacterial production at the southern stations which is also associated by increased ice cover. In addition, there seems to be a significant correlation for depth and abundance but not activity. This could be due to settling out of organisms to a deeper depth and not actually indicative of growth at the deeper depths and due to no significant differences in bacterial production between the depths.

MNAN:

Mixo has no correlation with depth but significance with latitude. This is likely driven in envrionmental changes in NH4 and NO3/NO2 need to check environmental correlations here. Only slightly off significance with light, if I pare down the number of variables this may change. There is no correlation with bprod for mixos, thus no correlation with ice. Only signifcant env variable is Nitrogen, but I think it’s likely light and N since light is significant for abundance. The decoupling of bprod suggests an overall lack of predator prey interactions with mixos and more they feed to supplement phototrophic growth.

PNAN:

NEED TO DO READING ON IMPORTANCE OF MIXING DEPTH Everything you would expect of PNANs basically occured. Not correlated with PAR, something we found with MNAN activity. May be due to lack of difference between surface and DCM depths. PAR would be strongly correlated with depth and the lack of a true DCM at most stations would explain the lack of a correlation with PAR. This study looks at a time prior to a bloom and thus the depth signal is lost. As we can see there is a confounding factor of time in the dataset and that as time passed there was an increase in PNANs and MNANs. There is a significant inverse relationship with ice, which suggests either an increase in light or a release of organisms stuck in the ice for growth. There is a significant correlation with bprod suggesting the same environmental conditions promote bacterial production and primary production, potentially a portion of that PP is going towards BP.

To attempt to estimate MNAN impact on PP measured at each station, I will look at the variance of PP and the amount explained by each group (i.e. their contribution to system PP). However, this was not measure at all depths or stations. I will be testing 2 approaches

#Station A and all bucket samples do not have PP data. 2 options here: Impute missing data or remove samples.
#Need to create imputed dataset. Using centroid imputation, specifically mean as its the easiest for diagnostic purposes.

Par_20_mean <- mean(as.numeric(ant_community_final_calc$Pprod_PAR_20), na.rm = TRUE)
## Warning in mean(as.numeric(ant_community_final_calc$Pprod_PAR_20), na.rm =
## TRUE): NAs introduced by coercion
#Sun_20_mean <- mean(as.numeric(ant_community_final_calc$pprod_Sun_20), na.rm = TRUE)

#extract rows with NAs to replace
which(is.na(as.numeric(ant_community_final_calc$Pprod_PAR_20)))
## Warning in which(is.na(as.numeric(ant_community_final_calc$Pprod_PAR_20))): NAs
## introduced by coercion
##  [1]  1  2  3  4  5 12 13 14 63 64 65 78 79 80
#using a mirrored dataset to test replacing imputations without impacting the original dataframe used in the rest of analyses.
ant_community_calc_forImpute <- ant_community_final_calc

#replace NA's
ant_community_calc_forImpute[which(is.na(as.numeric(ant_community_final_calc$Pprod_PAR_20))),'Pprod_PAR_20'] <- Par_20_mean
## Warning in which(is.na(as.numeric(ant_community_final_calc$Pprod_PAR_20))): NAs
## introduced by coercion
#ant_community_calc_forImpute[which(is.na(as.numeric(ant_community_final_calc$pprod_Sun_20))),'Pprod_PAR_20'] <- Sun_20_mean

#NO longer doing SUN 20 since there are too many unmeasured stations. May use a scalar later to infer potential PP at each site...

ant_community_final_calc_PP_remSites <- filter(ant_community_final_calc, !Pprod_PAR_20 == "nd")

#now to do a PCA analysis on both grouped and non-grouped datasets
#Non-grouped dataset
#unsure if its best to use all 3 variables in a single analysis to look at correlation of the 3 variables. May also test this as well.

ant_community_final_functional_PP_remSites <- select(ant_community_final_calc_PP_remSites, ANAN_mL:HNAN_mL)
ant_community_final_calc_bacterivory_PP_remSites <- select(ant_community_final_calc_PP_remSites, MNAN_SS_perday:HNAN_SS_perday, -station)
ant_community_final_calc_env_PP_remSites <- select(ant_community_final_calc_PP_remSites, 
                                             -(Long_deg:Long),
                                             -(waterT2:conductivity2), -oxygen2, -PAR2,
                                             -salinity2, -(pprod_Sun:pprod_Sun_20),
                                             -(Time_feeding:HNAN_mL),
                                             -(usphere_mL:HNAN_SS_percent))

ant_community_final_functional_PP_remSites[,c(1,3)]
##       ANAN_mL    MNAN_mL
## C_1  986.6928  17.582576
## C_2 1037.2242  18.838474
## C_3 1442.8055   0.000000
## C_4  616.6830  22.624638
## C_5 1787.2172  68.261766
## C_6 1870.9930  26.373864
## E_4 1582.4319  65.158959
## E_5 2066.4698  83.775804
## E_6 1628.9740  37.233691
## E_7 1312.4876   0.000000
## E_8 1117.0107   9.049855
## E_9 2094.3951  52.747729
## G_1 1096.0668  27.925268
## G_2 1089.0855  48.869219
## G_3  795.8701  27.925268
## G_4  759.2182  95.023482
## G_5 1058.8331 149.969032
## G_6 1186.8239  82.224400
## H_1 2224.7130  92.049958
## H_2 2503.9657 101.358380
## H_3 2932.1531  26.890999
## H_4 2164.2083  90.498554
## H_5 2012.9464   9.049855
## H_6 4132.9397 136.523533
## J_1 2495.8208  87.266463
## J_2 2897.2466 122.173048
## J_3 2635.4472  52.359878
## J_4 4007.2760 138.074936
## J_5 4747.2956 124.112302
## J_6 5571.0910 124.112302
## K_1 3291.1923  77.570189
## K_2 2234.0214  32.967330
## K_3 2460.9142  85.327208
## K_4 2284.7947 225.658732
## K_5 2373.6478  73.339088
## K_6 2583.0873  94.635631
## L_1 3853.6870 125.663706
## L_2 3015.9289 125.663706
## L_3 2471.3862  41.887902
## L_4 3490.6585  48.869219
## L_5 3476.6959 132.645023
## L_6 2038.5446   0.000000
## M_1 3298.6723   0.000000
## M_2 2967.0597  26.179939
## M_3 3124.1394  43.633231
## M_4 1688.2094  44.426563
## M_5 2527.2368 132.645023
## M_6 1982.6940 216.420827
## N_1 5393.0674 357.792497
## N_2 4939.2818 200.712864
## N_3 4345.8698 130.899694
## N_4 5599.0162 495.673508
## N_5 5291.8383 383.972435
## N_6 5026.5482 383.972435
## O_4 2967.0597 183.259571
## O_5 2844.8867 200.712864
## O_6 2949.6064  78.539816
## O_7 3057.8168 321.140582
## O_8 3155.5553 404.916386
## O_9 2624.9752 237.364778
## P_1 1703.4414   0.000000
## P_2 2303.8346   0.000000
## P_3 1815.1424   0.000000
## P_4 1843.0677   0.000000
## P_5 3029.8916   0.000000
## P_6 2108.3577   0.000000
## Q_4 3804.8178   0.000000
## Q_5 4188.7902   0.000000
## Q_6 4223.6968   0.000000
## Q_7 4831.0714   0.000000
## Q_8 4789.1835 146.607657
## Q_9 5235.9878   6.981317
ant_community_final_calc_env_PP_remSites$Pprod_PAR_20 <- as.numeric(ant_community_final_calc_env_PP_remSites$Pprod_PAR_20)


PP_nonimpute_prcomp <- prcomp(ant_community_final_functional_PP_remSites[,c(1,3)], scale. = TRUE, center = TRUE)

PP_pca_fit <- envfit(PP_nonimpute_prcomp ~ ., ant_community_final_calc_env_PP_remSites, perm = 999)
#autoplot(PP_pca_fit, geom = "label_repel")

autoplot(PP_nonimpute_prcomp, data = as.data.frame(ant_community_final_calc_PP_remSites), label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)

#Trying to get envfit onto a PCA plot
#first make a dataframe with scores
PP_pca_scores <- as.data.frame(scores(PP_nonimpute_prcomp))
PP_pca_scores_species <- as.data.frame(scores(PP_nonimpute_prcomp, display = "species"))
#add variables to group by
PP_pca_scores$depth <- ant_community_final_calc_PP_remSites$depth
PP_pca_scores$Group <- ant_community_final_calc_PP_remSites$Group
#Now extract the scores of the envfit() variables "vector" for continuous variables and "factor" for categorical variables. Vegan uses a scalar and ordiArrowMul() accounts for that
#ordiArrowMul() causes error here...testing without it
PP_nonimpute_envfit_scores <- as.data.frame(scores(PP_pca_fit, "vectors")) * ordiArrowMul(PP_pca_fit)



ggplot(data = PP_pca_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = PP_nonimpute_envfit_scores, aes(x = 0, y = 0, xend = PP_nonimpute_envfit_scores[28,1], yend = PP_nonimpute_envfit_scores[28,2]))+
  geom_text(data = PP_nonimpute_envfit_scores, aes(x = PP_nonimpute_envfit_scores[28,1], y = PP_nonimpute_envfit_scores[28,2]-0.04), fontface = "bold", label = rownames(PP_nonimpute_envfit_scores[28,]))+
  geom_segment(data = PP_pca_scores_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = PP_pca_scores_species, aes(x = PC1, y = PC2), label = rownames(PP_pca_scores_species))

#adonis vs adonis2
# they both use different code to get at the same issue. However, adonis uses the technique straight from leterature Mcardle and Anderson (2001), while adonis2 uses modified code "based on the priciples" from the same paper. They are both PERMANOVAS or analysis of variances and the proper test to calculate a possible scalar to attribute an amount of PP associated with each "functional" group.
#in their coding and evalutations adonis will always have the order matter. It adds each term sequentially to the model for comparison and thus the prior term will more likely explain a different amount of variance dependent on the order of terms. Adonis 2 you can ignore order with the argument (by = "margin")

adonis2(ant_community_final_functional_PP_remSites[,c(1,3)] ~ Pprod_PAR_20, data = ant_community_final_calc_env_PP_remSites, permutations = 999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = ant_community_final_functional_PP_remSites[, c(1, 3)] ~ Pprod_PAR_20, data = ant_community_final_calc_env_PP_remSites, permutations = 999)
##              Df SumOfSqs      R2      F Pr(>F)    
## Pprod_PAR_20  1   1.3286 0.32949 34.399  0.001 ***
## Residual     70   2.7036 0.67051                  
## Total        71   4.0322 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#PP Notes When doing the PP analysis I noticed some incongruencies. Mostly THERE ARE NO PP suface measurements for our size fraction………………………………………………………………………

After some discussion with JD, I’m going to use PAR_20 measured for all stations and look at PP analysis with that data, ignoring SUN measurements. Also, WHY were PAR and SUN measured for both surface and DCM samples….2x as much work for no reason.

PP measurements are colinear with each other and chlA and flr measures. Fractionated PP is not, suggesting the larger portion of the community (large diatoms account for most of the mass). Will still look to see MNAN influence on “potential” PP as we can’t be certain at the accuracy of the PP measures, unless we get a clear description from Wade’s group.

This should conclude all of the potential analysis I can do at this point. Or just off shoots of the same analysis until we get more complete data.

Then off to rutgers and andrews data!

I need to build multiple models to attempt to estimate the amount of primary production attributed to mixotrophy in situ.

  1. A linear model using sum squares ratio (amount of variance explained) to portion out the amount of PP contributed by each group.
  2. A PCA or multivariate regression, using the amount of variance explained by raw PP measured values on the abundance of MNANs and PNANs to then portion out the amount of PP contribute by each group.
  3. Attempt to use a model similar to those estimating the amount of PP being contributed via satellite remote sensing data. Using Chlorophyll a, PAR, PvsI curves to calculate the amount of PP done by each group.
  4. Similar to model 3, however, using the raw abundance numbers to estimate the chlorophyll of each group and using it in the calculation.
  5. Convert the PP values we have to a rate per pg of chlorophyll and then calculating the amount of PP.

The final 3 models will take some literature review.

#Need to create a new data table that includes the PP and the multiple scalars I want to use to calculate intragroup PP.
PP_data_frame <- select(ant_community_final_calc_PP_remSites, sample:Group, station, depth, depth_m, Chla, Pprod_PAR_20, PNAN_mL, MNAN_mL, PAR1, PAR2)

min(PP_data_frame$PAR1)
## [1] 0.40068
min(PP_data_frame$PAR2)
## [1] 0.67672
max(PP_data_frame$PAR1)
## [1] 622.9
max(PP_data_frame$PAR2)
## [1] 1868.7
PP_lm <- lm(Pprod_PAR_20 ~ PNAN_mL + MNAN_mL, PP_data_frame)

PP_aov <- summary(aov(PP_lm))


PP_data_frame$Method2_scalar <- PP_aov[[1]][["Sum Sq"]][2]/PP_aov[[1]][["Sum Sq"]][1]

#Pprod_PAR_20 is not a numeric from the problems with "nds" in the inital dataset.
PP_data_frame$Pprod_PAR_20 <- as.numeric(PP_data_frame$Pprod_PAR_20)

PP_data_frame1 <- mutate(PP_data_frame, 
                         Method1_scalar = MNAN_mL/PNAN_mL, 
                         Method1_MNAN_PP = Pprod_PAR_20*Method1_scalar,
                         Method1_PNAN_PP = Pprod_PAR_20-Method1_MNAN_PP,
                         Method2_MNAN_PP = Pprod_PAR_20*Method2_scalar,
                         Method2_PNAN_PP = Pprod_PAR_20-Method2_MNAN_PP)

#I need to finish adding the final method. I need to P vs I curves to properly calculate everything...all other variables are added into the dataframe.

#Initial notes: there's <5% difference between the 2 methods.

Time to visualize the difference (if any) between the models

#Method 1
ggplot(PP_data_frame1)+
  geom_boxplot(aes(x = station, y = Method1_MNAN_PP, fill = depth))

ggplot(PP_data_frame1)+
  geom_boxplot(aes(x = station, y = Method1_PNAN_PP, fill = depth, color = depth))

#Method 2
ggplot(PP_data_frame1)+
  geom_boxplot(aes(x = station, y = Method2_MNAN_PP, fill = depth, color = depth))

ggplot(PP_data_frame1)+
  geom_boxplot(aes(x = station, y = Method2_PNAN_PP, fill = depth, color = depth))

chlA_cruise <- read.csv("LTER_PP_data/Chlorophyll_Cruise.csv")
C_fixed_cruise <- read.csv("LTER_PP_data/Primary Production_Cruise.csv")

str(chlA_cruise)
## 'data.frame':    24600 obs. of  18 variables:
##  $ studyName           : chr  "PD91-09" "PD91-09" "PD91-09" "PD91-09" ...
##  $ Event               : chr  "-999" "-999" "-999" "-999" ...
##  $ Cast.Number         : chr  "" "" "" "" ...
##  $ Bottle              : chr  "" "" "" "" ...
##  $ Depth..m.           : chr  "0" "0" "0" "0" ...
##  $ Datetime.GMT        : chr  "11/14/91 18:56" "11/14/91 23:22" "11/16/91 0:23" "11/16/91 1:39" ...
##  $ Latitude..º.        : num  -63.9 -63.7 -63.7 -63.8 -64 ...
##  $ Longitude..º.       : num  -64.2 -64.4 -64.9 -65.5 -65.7 ...
##  $ Nominal.Station     : chr  "" "" "" "" ...
##  $ Grid.Line           : chr  "700" "700" "680" "660" ...
##  $ Grid.Station        : chr  "110" "130" "160" "160" ...
##  $ Irradiance....      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Chlorophyll..mg.m.. : num  0.206 0.172 0.187 0.252 0.266 0.223 0.28 0.259 0.438 0.446 ...
##  $ Phaeopigment..mg.m..: num  0.06 0.072 0.086 0.097 0.101 0.078 0.077 0.089 0.088 0.119 ...
##  $ Filter.Code         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Platform            : chr  "ship" "ship" "ship" "ship" ...
##  $ Sampling.Device     : chr  "SWI - alongtrack" "SWI - alongtrack" "SWI - alongtrack" "SWI - alongtrack" ...
##  $ Notes               : chr  "" "" "NOT_filtered_when_taken.__Left_exposed_before_filtering._Filtered_at_0312" "NOT_filtered_when_taken.__Left_exposed_before_filtering._Filtered_at_0312" ...
str(C_fixed_cruise)
## 'data.frame':    7004 obs. of  14 variables:
##  $ studyName                     : chr  "LMG98-01" "LMG98-01" "LMG98-01" "LMG98-01" ...
##  $ Event                         : int  24 24 24 24 24 24 48 48 48 48 ...
##  $ Cast                          : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Bottle                        : int  11 10 8 7 6 5 12 10 9 8 ...
##  $ Datetime.GMT                  : chr  "1/29/98 20:20" "1/29/98 20:20" "1/29/98 20:20" "1/29/98 20:20" ...
##  $ Station.Name                  : chr  "" "" "" "" ...
##  $ Line                          : num  600 600 600 600 600 600 600 600 600 600 ...
##  $ Station                       : num  60 60 60 60 60 60 100 100 100 100 ...
##  $ Latitude..º.                  : num  -64.8 -64.8 -64.8 -64.8 -64.8 ...
##  $ Longitude..º.                 : num  -64.7 -64.7 -64.7 -64.7 -64.7 ...
##  $ Depth..m.                     : chr  "1" "5.8" "10.7" "17.7" ...
##  $ Primary.Production..mg.m..day.: num  2.71 18.8 26.78 20.33 10.58 ...
##  $ Prim.Prod.SD..mg.m..day.      : num  0.48 0.79 3.26 4.22 1.36 0.11 5.23 4.74 1.46 4.24 ...
##  $ Percent_Irradiance....        : num  100 50 25 10 5 1 100 50 25 10 ...
summary(chlA_cruise)
##   studyName            Event           Cast.Number           Bottle         
##  Length:24600       Length:24600       Length:24600       Length:24600      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   Depth..m.         Datetime.GMT        Latitude..º.      Longitude..º.     
##  Length:24600       Length:24600       Min.   :-1015.65   Min.   :-1015.65  
##  Class :character   Class :character   1st Qu.:  -66.51   1st Qu.:  -69.59  
##  Mode  :character   Mode  :character   Median :  -65.23   Median :  -67.08  
##                                        Mean   :  -55.23   Mean   :  -56.87  
##                                        3rd Qu.:  -64.77   3rd Qu.:  -64.40  
##                                        Max.   :   68.98   Max.   :   76.58  
##                                        NA's   :3          NA's   :3         
##  Nominal.Station     Grid.Line         Grid.Station       Irradiance....   
##  Length:24600       Length:24600       Length:24600       Min.   :-999.00  
##  Class :character   Class :character   Class :character   1st Qu.:   1.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  11.00  
##                                                           Mean   : -53.23  
##                                                           3rd Qu.:  50.00  
##                                                           Max.   : 100.00  
##                                                           NA's   :16478    
##  Chlorophyll..mg.m.. Phaeopigment..mg.m..  Filter.Code        Platform        
##  Min.   : -999.000   Min.   : -999.000    Min.   :-999.00   Length:24600      
##  1st Qu.:    0.115   1st Qu.:    0.031    1st Qu.:   1.00   Class :character  
##  Median :    0.427   Median :    0.086    Median :   1.00   Mode  :character  
##  Mean   :    1.528   Mean   :    0.062    Mean   : -30.48                     
##  3rd Qu.:    1.120   3rd Qu.:    0.201    3rd Qu.:   2.00                     
##  Max.   :13292.500   Max.   :10447.300    Max.   : 208.00                     
##  NA's   :63          NA's   :62                                               
##  Sampling.Device       Notes          
##  Length:24600       Length:24600      
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
summary(C_fixed_cruise)
##   studyName             Event             Cast           Bottle       
##  Length:7004        Min.   :   6.0   Min.   : 1.00   Min.   :-999.00  
##  Class :character   1st Qu.: 178.0   1st Qu.:15.00   1st Qu.:  14.00  
##  Mode  :character   Median : 350.0   Median :27.00   Median :  18.00  
##                     Mean   : 409.9   Mean   :30.15   Mean   :  15.55  
##                     3rd Qu.: 572.0   3rd Qu.:43.00   3rd Qu.:  21.50  
##                     Max.   :1273.0   Max.   :79.00   Max.   :  26.00  
##                                      NA's   :5481    NA's   :2009     
##  Datetime.GMT       Station.Name            Line           Station      
##  Length:7004        Length:7004        Min.   :-158.0   Min.   :-80.00  
##  Class :character   Class :character   1st Qu.: 200.0   1st Qu.: 38.00  
##  Mode  :character   Mode  :character   Median : 400.0   Median : 80.06  
##                                        Mean   : 382.3   Mean   : 89.41  
##                                        3rd Qu.: 593.0   3rd Qu.:159.86  
##                                        Max.   : 623.3   Max.   :260.33  
##                                        NA's   :542      NA's   :542     
##   Latitude..º.    Longitude..º.     Depth..m.        
##  Min.   :-70.09   Min.   :-78.90   Length:7004       
##  1st Qu.:-66.98   1st Qu.:-70.40   Class :character  
##  Median :-65.88   Median :-68.30   Mode  :character  
##  Mean   :-66.02   Mean   :-68.43                     
##  3rd Qu.:-64.88   3rd Qu.:-66.15                     
##  Max.   :-63.48   Max.   :-44.32                     
##                                                      
##  Primary.Production..mg.m..day. Prim.Prod.SD..mg.m..day. Percent_Irradiance....
##  Min.   :-999.000               Min.   :-999.0000        Min.   :-999.0        
##  1st Qu.:   2.978               1st Qu.:   0.1966        1st Qu.:   5.0        
##  Median :   9.889               Median :   0.7892        Median :  24.0        
##  Mean   :  29.435               Mean   : -21.7156        Mean   :  32.1        
##  3rd Qu.:  27.043               3rd Qu.:   2.9363        3rd Qu.:  50.0        
##  Max.   :2273.473               Max.   : 569.8715        Max.   : 100.0        
##                                 NA's   :4                NA's   :343
#need to fix the dates and determine a proper id:thinking paste("line" "station" "depth" "year")

C_fixed_cruise$Datetime.GMT <- strptime(C_fixed_cruise$Datetime.GMT, "%m/%d/%y %H:%M")
C_fixed_cruise$Year <- C_fixed_cruise$Datetime.GMT$year+1900
head(C_fixed_cruise)
##   studyName Event Cast Bottle        Datetime.GMT Station.Name Line Station
## 1  LMG98-01    24   NA     11 1998-01-29 20:20:00               600      60
## 2  LMG98-01    24   NA     10 1998-01-29 20:20:00               600      60
## 3  LMG98-01    24   NA      8 1998-01-29 20:20:00               600      60
## 4  LMG98-01    24   NA      7 1998-01-29 20:20:00               600      60
## 5  LMG98-01    24   NA      6 1998-01-29 20:20:00               600      60
## 6  LMG98-01    24   NA      5 1998-01-29 20:20:00               600      60
##   Latitude..º. Longitude..º. Depth..m. Primary.Production..mg.m..day.
## 1     -64.8181     -64.73208         1                           2.71
## 2     -64.8181     -64.73208       5.8                          18.80
## 3     -64.8181     -64.73208      10.7                          26.78
## 4     -64.8181     -64.73208      17.7                          20.33
## 5     -64.8181     -64.73208      23.2                          10.58
## 6     -64.8181     -64.73208      41.4                           0.21
##   Prim.Prod.SD..mg.m..day. Percent_Irradiance.... Year
## 1                     0.48                    100 1998
## 2                     0.79                     50 1998
## 3                     3.26                     25 1998
## 4                     4.22                     10 1998
## 5                     1.36                      5 1998
## 6                     0.11                      1 1998
#View(C_fixed_cruise)

chlA_cruise$Datetime.GMT <- strptime(chlA_cruise$Datetime.GMT, "%m/%d/%y %H:%M")
chlA_cruise$Year <- chlA_cruise$Datetime.GMT$year+1900
head(chlA_cruise)
##   studyName Event Cast.Number Bottle Depth..m.        Datetime.GMT Latitude..º.
## 1   PD91-09  -999                            0 1991-11-14 18:56:00    -63.85133
## 2   PD91-09  -999                            0 1991-11-14 23:22:00    -63.73200
## 3   PD91-09  -999                            0 1991-11-16 00:23:00    -63.67833
## 4   PD91-09  -999                            0 1991-11-16 01:39:00    -63.82333
## 5   PD91-09  -999                            0 1991-11-16 02:43:00    -63.95000
## 6   PD91-09  -999                            0 1991-11-16 03:10:00    -64.02833
##   Longitude..º. Nominal.Station Grid.Line Grid.Station Irradiance....
## 1     -64.18150                       700          110             NA
## 2     -64.44417                       700          130             NA
## 3     -64.89167                       680          160             NA
## 4     -65.45500                       660          160             NA
## 5     -65.71833                       640          160             NA
## 6     -65.86000                                                    NA
##   Chlorophyll..mg.m.. Phaeopigment..mg.m.. Filter.Code Platform
## 1               0.206                0.060           1     ship
## 2               0.172                0.072           1     ship
## 3               0.187                0.086           1     ship
## 4               0.252                0.097           1     ship
## 5               0.266                0.101           1     ship
## 6               0.223                0.078           1     ship
##    Sampling.Device
## 1 SWI - alongtrack
## 2 SWI - alongtrack
## 3 SWI - alongtrack
## 4 SWI - alongtrack
## 5 SWI - alongtrack
## 6 SWI - alongtrack
##                                                                       Notes
## 1                                                                          
## 2                                                                          
## 3 NOT_filtered_when_taken.__Left_exposed_before_filtering._Filtered_at_0312
## 4 NOT_filtered_when_taken.__Left_exposed_before_filtering._Filtered_at_0312
## 5 NOT_filtered_when_taken.__Left_exposed_before_filtering._Filtered_at_0312
## 6                                                                    no_XBT
##   Year
## 1 1991
## 2 1991
## 3 1991
## 4 1991
## 5 1991
## 6 1991
#View(chlA_cruise)

chlA_cruise$ID <- paste(chlA_cruise$Grid.Line, chlA_cruise$Grid.Station, chlA_cruise$Depth..m., chlA_cruise$Year, chlA_cruise$Irradiance...., sep = "_")

C_fixed_cruise$ID <- paste(C_fixed_cruise$Line, C_fixed_cruise$Station, C_fixed_cruise$Depth..m., C_fixed_cruise$Year,C_fixed_cruise$Percent_Irradiance...., sep = "_")

#ChlA has values from prior to 1998...need to filter out all samples from prior to 1998

#C_fixed_cruise$ID[duplicated(C_fixed_cruise$ID)]

#Fixed the dates but NOW need to fix the station and line values to correspond correctly in my ID system.

#NEED to drop depth from the ID system. It relays the same info as irradiance but is terribly inconsistent between files. Using irradiance and dealing with duplicates will be easier.

#And ADD event as that is a particular identifier for the cast event.
#considering using study name instead of year.

#ID system should be: Study name, Event, Line, Station, irradiance (depth wasn't exactly the same across the datasets thus creating "duplicate" IDs)
#The last identifier I need to figure out and comb through the data to clean up any inconsistencies between the datasets.

#I will then groupby() by ID name as there are replicates for certain samples and take the means of them.

#These last 2 steps should get me to 7004 observations or rows (this wil be less with discepencies in % irradiance recorde between the 2 datasets.

#some events don't have the same number of irradiance levels such as: pd95-01 event 126
#This event only has irradiances 100, 55, 27, 2 for chlorophyll and 100, 55, 27, 11, 5 , 2
#My id system will join only those that all the info is collected for and remove IDS that aren't shared...

#Study Name Checklist:
#pd95-01 - DONE
#pd96-01 - DONE
#pd97-01 - DONE - want to capture Marg Bay samplings - event numbers: 1096 and 1105
# and the lamaire: 1225
#LMG98-01 - DONE
#LMG99-01 - DONE Check deep location event 899
#LMGR98-08 - unusable
#
# Reading in new datafile *NEED TO CLEAN UP CODE*------
ant_community_final_PP <- read.csv("/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/NBP1910_envdata_forFEED_final_newgroup_PP.csv")


str(ant_community_final_PP)
## 'data.frame':    87 obs. of  74 variables:
##  $ X                     : chr  "A_1" "A_2" "A_3" "A_4" ...
##  $ sample                : chr  "A_Surf_1" "A_Surf_2" "A_Surf_3" "A_DCM_1" ...
##  $ Rep                   : chr  "A" "B" "C" "A" ...
##  $ Group                 : chr  "Gerlache" "Gerlache" "Gerlache" "Gerlache" ...
##  $ date                  : chr  "6-Nov" "6-Nov" "6-Nov" "6-Nov" ...
##  $ Lat_deg               : int  64 64 64 64 64 64 64 64 64 64 ...
##  $ Lat_min               : num  32.4 32.4 32.4 32.4 32.4 32.4 53.7 53.7 53.7 53.7 ...
##  $ Lat                   : num  64.5 64.5 64.5 64.5 64.5 ...
##  $ Long_deg              : int  62 62 62 62 62 62 64 64 64 64 ...
##  $ Long_min              : num  22.4 22.4 22.4 22.4 22.4 22.4 12.1 12.1 12.1 12.1 ...
##  $ Long                  : num  62.4 62.4 62.4 62.4 62.4 ...
##  $ bottom                : int  740 740 740 740 740 740 799 799 799 799 ...
##  $ ice                   : num  0 0 0 0 0 0 1 1 1 1 ...
##  $ airT                  : num  -0.5 -0.5 -0.5 -0.5 -0.5 -0.5 -1.4 -1.4 -1.4 -1.4 ...
##  $ PAR0                  : chr  "nd" "nd" "nd" "nd" ...
##  $ station               : chr  "A" "A" "A" "A" ...
##  $ depth                 : chr  "Surf" "Surf" "Surf" "DCM" ...
##  $ depth_m               : num  1.4 1.4 1.4 45.2 45.2 45.2 1 1 1 30.2 ...
##  $ Chla                  : num  0.5 0.5 0.5 0.52 0.52 0.52 0.43 0.43 0.43 0.22 ...
##  $ prDM                  : int  1 1 1 46 46 46 1 1 1 30 ...
##  $ depSM                 : num  0.991 0.991 0.991 45.545 45.545 ...
##  $ ZML_T_m..0.2ºC.       : num  43.6 43.6 43.6 43.6 43.6 ...
##  $ ZML_TS_m..0.023Kg.m3. : num  24.8 24.8 24.8 24.8 24.8 ...
##  $ ZML_TSP_m..0.023Kg.m3.: num  13.9 13.9 13.9 13.9 13.9 ...
##  $ Ze_m..1..             : num  49.5 49.5 49.5 49.5 49.5 ...
##  $ ZCM_m..maxFluo.       : num  30.7 30.7 30.7 30.7 30.7 ...
##  $ waterT1               : num  -0.631 -0.631 -0.631 -0.403 -0.403 ...
##  $ conductivity1         : num  27.8 27.8 27.8 28.1 28.1 ...
##  $ waterT2               : num  -0.625 -0.625 -0.625 -0.401 -0.401 ...
##  $ conductivity2         : num  27.8 27.8 27.8 28.1 28.1 ...
##  $ oxygen1               : num  7.01 7.01 7.01 6.23 6.23 ...
##  $ oxygen2               : num  7.08 7.08 7.08 6.32 6.32 ...
##  $ fluorescence          : num  0.151 0.151 0.151 0.739 0.739 ...
##  $ beamTrans             : num  97.9 97.9 97.9 98.6 98.6 ...
##  $ PAR1                  : num  965.3 965.3 965.3 13.9 13.9 ...
##  $ PAR2                  : num  2895.8 2895.8 2895.8 41.8 41.8 ...
##  $ latitude              : num  -64.5 -64.5 -64.5 -64.5 -64.5 ...
##  $ longitude             : num  -62.4 -62.4 -62.4 -62.4 -62.4 ...
##  $ timeJ                 : num  310 310 310 310 310 ...
##  $ altM                  : num  100.9 100.9 100.9 99.5 99.5 ...
##  $ spar                  : num  920 920 920 1085 1085 ...
##  $ timeS                 : num  1088 1088 1088 835 835 ...
##  $ scan                  : int  26122 26122 26122 20045 20045 20045 58590 58590 58590 55872 ...
##  $ salinity1             : num  34 34 34 34.2 34.2 ...
##  $ salinity2             : num  34 34 34 34.2 34.2 ...
##  $ oxygenSaturation      : num  8.2 8.2 8.2 8.14 8.14 ...
##  $ nbin                  : int  72 72 72 65 65 65 38 38 38 41 ...
##  $ bprod                 : num  4.07 4.07 4.07 3.64 3.64 3.64 7.85 7.85 7.85 1.68 ...
##  $ pprod_Sun             : num  NA NA NA NA NA ...
##  $ pprod_PAR             : num  NA NA NA NA NA ...
##  $ pprod_Sun_20          : num  NA NA NA NA NA ...
##  $ Pprod_PAR_20          : num  NA NA NA NA NA NA 0.973 0.973 0.973 0.238 ...
##  $ Pprod_PAR_20_stdev    : num  NA NA NA NA NA NA 0.077 0.077 0.077 0.045 ...
##  $ NH4                   : num  1.38 1.38 1.38 1.27 1.27 1.27 1.48 1.48 1.48 1.35 ...
##  $ NO2_NO3               : num  30 30 30 30.9 30.9 ...
##  $ PO4                   : num  3.03 3.03 3.03 3.29 3.29 3.29 3.03 3.03 3.03 3.07 ...
##  $ Total_DIN             : num  31.4 31.4 31.4 32.1 32.1 ...
##  $ N_P_Ratio             : num  10.35 10.35 10.35 9.77 9.77 ...
##  $ Time_feeding          : int  30 30 30 30 30 30 30 30 30 30 ...
##  $ ANAN_mL               : num  1089 1415 586 1452 1551 ...
##  $ PNAN_mL               : num  1126 1433 614 1620 1582 ...
##  $ MNAN_mL               : num  23.27 4.65 13.96 130.32 0 ...
##  $ HNAN_mL               : num  382 316 140 261 155 ...
##  $ Bact_mL               : num  136415 136415 136415 158414 158414 ...
##  $ usphere_mL            : num  20205 20205 20205 20781 20781 ...
##  $ usphere_mixo          : num  0.665 0 0 0.604 0.427 ...
##  $ usphere_hetero        : num  0.0887 0 0 0.1294 0.0854 ...
##  $ mixo_act              : int  18160 18160 18160 6811 6811 6811 18736 18736 18736 26101 ...
##  $ mixo_maybe_act        : int  943 943 943 193 193 193 1477 1477 1477 2634 ...
##  $ phyto_act             : int  4356 4356 4356 2972 2972 2972 7154 7154 7154 11243 ...
##  $ hetero_act            : int  723 723 723 512 512 512 553 553 553 3132 ...
##  $ bprod_stdev           : num  0.28 0.28 0.28 0.14 0.14 0.14 1.09 1.09 1.09 0.48 ...
##  $ pprod_Sun_stdev       : num  NA NA NA NA NA NA 0.154 0.154 0.154 0.025 ...
##  $ pprod_PAR_stdev       : num  NA NA NA NA NA NA 0.445 0.445 0.445 0.419 ...
ant_community_final_PP$Rep <- as.factor(ant_community_final_PP$Rep)
ant_community_final_PP$station <- as.factor(ant_community_final_PP$station)
ant_community_final_PP$depth <- factor(ant_community_final_PP$depth, levels = c("Surf", "DCM", "Bucket"))
ant_community_final_PP$Group <- factor(ant_community_final_PP$Group, levels = c("Gerlache", "Palmer", "Grandidier","Offshore","Maguerite"))

#Fixing column headings -----
#Headers of env vars I'm using and need to change
colname_fix <- c("waterT1", "conductivity1", "oxygen1", "PAR1", "salinity1", "oxygenSaturation","ZML_TS_m..0.023Kg.m3.","timeJ","Ze_m..1..")

#Check the order if the which statement first - that's why the new names are in a diff order from the vector

colnames(ant_community_final_PP)[which(colnames(ant_community_final_PP) %in% colname_fix)] <- c("ZML","ZE", "Water_Temp", "conductivity", "oxygen","PAR","Time", "salinity", "oxygen_saturation")

#Calculating per ML concentrations/feeding/ and PP rates -----
ant_community_final_PP <- filter(ant_community_final_PP, !(Rep == "C" & station == "A" & depth == "DCM"))

ant_community_final_PP_calc <- dplyr::mutate(ant_community_final_PP,
                                         TNAN = ANAN_mL+MNAN_mL+HNAN_mL,
                                         PNAN_percent = PNAN_mL/TNAN*100,
                                         MNAN_percent = MNAN_mL/TNAN*100,
                                         HNAN_percent = HNAN_mL/TNAN*100,
                                         ANAN_percent = ANAN_mL/TNAN*100,
                                         bac_usphere_ratio = Bact_mL/usphere_mL,
                                         MNAN_bacterivory = usphere_mixo/(Time_feeding/60)*bac_usphere_ratio,
                                         HNAN_bacterivory = usphere_hetero/(Time_feeding/60)*bac_usphere_ratio,
                                         MNAN_SS = MNAN_bacterivory/Bact_mL*MNAN_mL*100,
                                         HNAN_SS = HNAN_bacterivory/Bact_mL*HNAN_mL*100,
                                         MNAN_SS_perday = MNAN_SS*24,
                                         HNAN_SS_perday = HNAN_SS*24,
                                         Total_SS = MNAN_SS + HNAN_SS,
                                         MNAN_SS_percent = MNAN_SS/Total_SS*100,
                                         HNAN_SS_percent = HNAN_SS/Total_SS*100,
                                         Total_act = mixo_act+mixo_maybe_act+phyto_act,
                                         mixo_act_percent = mixo_act/Total_act,
                                         mixo_maybe_act_percent = mixo_maybe_act/Total_act,
                                         phyto_act_percent = phyto_act/Total_act,
                                         Mixo_PP = Pprod_PAR_20*mixo_act_percent,
                                         Mixo_maybe_PP = Pprod_PAR_20*mixo_maybe_act_percent,
                                         Phyto_PP = Pprod_PAR_20*phyto_act_percent,
                                         Mixo_PP_sd = Pprod_PAR_20_stdev*mixo_act_percent,
                                         Mixo_maybe_PP_sd = Pprod_PAR_20_stdev*mixo_maybe_act_percent,
                                         Phyto_PP_sd = Pprod_PAR_20_stdev*phyto_act_percent,
                                         Mixo_PP_potential = Pprod_PAR_20 * (mixo_act_percent + mixo_maybe_act_percent),
                                         Mixo_PP_potential_sd = Pprod_PAR_20_stdev * (mixo_act_percent + mixo_maybe_act_percent))

#Instead of trying to combine the means and sd like they are 2 different groups I just add the percentages together prior to multiplying by the carbon fixation data. MUCH easier and cleaner. Shouldve just done this from the beginning....
##Mixo_PP_potential = (Mixo_PP*3+Mixo_maybe_PP*3)/6,
#                                         Mixo_PP_potential_sd = (3*(Mixo_PP_sd^2+(Mixo_PP-Mixo_PP_potential)^2)+3*(Mixo_maybe_PP_sd^2+(Mixo_maybe_PP-Mixo_PP_potential)^2))/6,
#                                         Mixo_PP_potential_sd_cov = sqrt((Mixo_PP_sd^2 + Mixo_maybe_PP_sd^2 + 2*cov(Mixo_maybe_PP, Mixo_PP, use = "complete.obs"))),
#                                         cov_test =  Mixo_PP_sd^2 + Mixo_maybe_PP_sd^2 + 2*cov(Mixo_maybe_PP, Mixo_PP, use = "complete.obs")

# PP visualizations ----------
ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q"), depth == "DCM"))+
  geom_point(aes(x = station, y = Mixo_PP))
## Warning: Removed 2 rows containing missing values (`geom_point()`).

ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q"), depth == "DCM"))+
  geom_point(aes(x = station, y = Mixo_maybe_PP))
## Warning: Removed 2 rows containing missing values (`geom_point()`).

ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q"), depth == "DCM"))+
  geom_point(aes(x = station, y = Phyto_PP))
## Warning: Removed 2 rows containing missing values (`geom_point()`).

ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q"), depth == "DCM"))+
  geom_point(aes(x = station, y = Mixo_PP, color = "Mixo"))+
  geom_point(aes(x = station, y = Mixo_maybe_PP, color = "Mixo?"))+
  geom_point(aes(x = station, y = Phyto_PP, color = "Phyto"))
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Removed 2 rows containing missing values (`geom_point()`).
## Removed 2 rows containing missing values (`geom_point()`).

#Visualizations with Surf and DCM---------
ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q")))+
  geom_point(aes(x = station, y = Mixo_PP, color = depth))
## Warning: Removed 8 rows containing missing values (`geom_point()`).

ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q")))+
  geom_point(aes(x = station, y = Mixo_maybe_PP, color = depth))
## Warning: Removed 8 rows containing missing values (`geom_point()`).

ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q")))+
  geom_point(aes(x = station, y = Phyto_PP, color = depth))
## Warning: Removed 8 rows containing missing values (`geom_point()`).

ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q")))+
  geom_point(aes(x = station, y = Mixo_PP, color = "Mixo", shape = depth))+
  geom_point(aes(x = station, y = Mixo_maybe_PP, color = "Mixo?", shape = depth))+
  geom_point(aes(x = station, y = Phyto_PP, color = "Phyto", shape = depth))
## Warning: Removed 8 rows containing missing values (`geom_point()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).
## Removed 8 rows containing missing values (`geom_point()`).

#Visualizations with all stations and depths---------
ggplot(ant_community_final_PP_calc)+
  geom_point(aes(x = station, y = Mixo_PP, color = depth))
## Warning: Removed 20 rows containing missing values (`geom_point()`).

ggplot(ant_community_final_PP_calc)+
  geom_point(aes(x = station, y = Mixo_maybe_PP, color = depth))
## Warning: Removed 20 rows containing missing values (`geom_point()`).

ggplot(ant_community_final_PP_calc)+
  geom_point(aes(x = station, y = Phyto_PP, color = depth))
## Warning: Removed 20 rows containing missing values (`geom_point()`).

MPNAN_PP_distribution_plot <- ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))+
  geom_errorbar(aes(x = station, y = Mixo_PP, ymin = Mixo_PP - Mixo_PP_sd, ymax = Mixo_PP +  Mixo_PP_sd, color = "Mixo", linetype = depth), position = position_dodge(0.1))+
  geom_errorbar(aes(x = station, y = Mixo_maybe_PP, ymin = Mixo_maybe_PP - Mixo_maybe_PP_sd, ymax = Mixo_maybe_PP + Mixo_maybe_PP_sd, color = "Mixo?", linetype = depth), position = position_dodge(0.1))+
  geom_errorbar(aes(x = station, y = Phyto_PP, ymin = Phyto_PP - Phyto_PP_sd, ymax = Phyto_PP + Phyto_PP_sd, color = "Phyto", linetype = depth), position = position_dodge(0.1))+
  geom_point(aes(x = station, y = Mixo_PP, color = "Mixo", shape = depth))+
  geom_point(aes(x = station, y = Mixo_maybe_PP, color = "Mixo?", shape = depth))+
  geom_point(aes(x = station, y = Phyto_PP, color = "Phyto", shape = depth)) + 
  ylab("ugrams of C L"^-1~"day"^-1)+ xlab("Station")+ labs(linetype = "Depth", color = "Functional Group", shape = "Depth")

ggsave("MPNAN_PP_carbon_distribution.png", plot = MPNAN_PP_distribution_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
MNAN_PP_plot <- ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))+
  geom_errorbar(aes(x = station, y = Mixo_PP, ymin = Mixo_PP - Mixo_PP_sd, ymax = Mixo_PP +  Mixo_PP_sd, color = depth), position = position_dodge(0.1))+
  geom_point(aes(x = station, y = Mixo_PP, color = depth))+ 
  ylab("ugrams of C L"^-1~"day"^-1)+ xlab("Station")+ labs(linetype = "Depth", color = "Functional Group", shape = "Depth")+scale_fill_manual(values = c("green4","blue3"))+theme_bw()

MNAN_PP_potential_plot <- ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))+
  geom_errorbar(aes(x = station, y = Mixo_PP_potential, ymin = Mixo_PP_potential - Mixo_PP_potential_sd, ymax = Mixo_PP_potential +  Mixo_PP_potential_sd, color = depth), position = position_dodge(0.1))+
  geom_point(aes(x = station, y = Mixo_PP_potential, color = depth))+ 
  ylab("")+ xlab("")+ labs(linetype = "Depth", color = "Functional Group", shape = "Depth")+scale_color_manual(values = c("blue3","green4"))+theme_bw()

PNAN_PP_plot <- ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))+
  geom_errorbar(aes(x = station, y = Phyto_PP, ymin = Phyto_PP - Phyto_PP_sd, ymax = Phyto_PP +  Phyto_PP_sd, color = depth), position = position_dodge(0.1))+
  geom_point(aes(x = station, y = Phyto_PP, color = depth))+ 
  ylab("")+ xlab("")+ labs(linetype = "Depth", color = "Functional Group", shape = "Depth")+scale_color_manual(values = c("blue3","green4"))+theme_bw()

#filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")
# Quickly throw together a PCA analysis to see what's going on --------
#need to add rownames for the PCA analysis to run
rownames(ant_community_final_PP_calc) <- ant_community_final_PP_calc$X
ant_community_final_PP_calc_PCA_filter <- filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")
ant_community_final_PP_functional <- select(ant_community_final_PP_calc_PCA_filter, Mixo_PP:Phyto_PP)

Primary_Production_prcmp <- prcomp(ant_community_final_PP_functional,center = TRUE, scale = TRUE)

#Superseeded but quick way to plot data
#autoplot(Primary_Production_prcmp, data = ant_community_final_PP_calc_PCA_filter, label = TRUE, label.size = 2.5, loadings.label = TRUE, loadings.label.size = 3, loadings.label.colour = "black", colour = 'Group', label.repel = TRUE)

#PCA with envfit ------
#Need to subset env data again
ant_community_final_PP_functional_env <- select(ant_community_final_PP_calc_PCA_filter,  
                                             -(Lat_deg:Long),
                                             -(waterT2:conductivity2), -oxygen2, -PAR2,
                                             -salinity2, -(Time_feeding:HNAN_mL),
                                             -(usphere_mL:Mixo_PP_potential_sd),
                                             -(Total_act:Phyto_PP), 
                                             -PAR0,
                                             -(pprod_Sun:Pprod_PAR_20_stdev),
                                             )

ant_community_final_PP_functional_env <- ant_community_final_PP_functional_env[,-(1:4)]

ant_community_final_PP_functional_env <- ant_community_final_PP_functional_env[,which(!colnames(ant_community_final_PP_functional_env) %in% redundant_vars_vector)]

ant_community_final_PP_functional_fit <- envfit(Primary_Production_prcmp ~ ., ant_community_final_PP_functional_env, perm = 999)
#Significant vars: almost all of them

#scores and species of initial PCA of functional responses
Primary_Production_prcmp_scores <- as.data.frame(scores(Primary_Production_prcmp))
Primary_Production_prcmp_species <- as.data.frame(scores(Primary_Production_prcmp, display = "species"))*2
Primary_Production_prcmp_scores$depth <- ant_community_final_PP_calc_PCA_filter$depth
Primary_Production_prcmp_scores$Group <- ant_community_final_PP_calc_PCA_filter$Group
#scores of vectors from the envfit used to then plot on top of original PCAs they were fit to
ant_community_final_PP_functional_fit_scores <- as.data.frame(scores(ant_community_final_PP_functional_fit, "vectors"))*2.4

#By Group
ggplot(data = Primary_Production_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20,23,24),], aes(x = 0, y = 0, xend = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20,23,24),1], yend = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20,23,24),2]))+
  geom_text_repel(data = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20,23,24),], aes(x = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20,23,24),1]+0.1, y = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20,23,24),2]), fontface = "bold", label = rownames(ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20,23,24),]), nudge_y = -0.03)+
  geom_segment(data = Primary_Production_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = Primary_Production_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_prcmp_scores))
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_text_repel()`).

#By Depth
ggplot(data = Primary_Production_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20),], aes(x = 0, y = 0, xend = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20),1], yend = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20),2]))+
  geom_text_repel(data = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20),], aes(x = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20),1], y = ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20),2]-0.04), fontface = "bold", label = rownames(ant_community_final_PP_functional_fit_scores[c(2,5,6,9:11,13:15,19,20),]))+
  geom_segment(data = Primary_Production_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = Primary_Production_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1),size = 3,label = rownames(Primary_Production_prcmp_scores))

#By Group
ggplot(data = Primary_Production_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = Primary_Production_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = Primary_Production_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_prcmp_scores))

#By Depth
ggplot(data = Primary_Production_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = Primary_Production_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = Primary_Production_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1),size = 3,label = rownames(Primary_Production_prcmp_scores))

#Nutrient and light only envfit------
ant_community_final_PP_functional_env_nutonly <- select(ant_community_final_PP_functional_env, NH4, NO2_NO3, PO4, N_P_Ratio, PAR)

ant_community_final_PP_functional_nutonly_fit <- envfit(Primary_Production_prcmp, ant_community_final_PP_functional_env_nutonly, permutations = 999)

ant_community_final_PP_functional_nutonly_fit_scores <-  as.data.frame(scores(ant_community_final_PP_functional_nutonly_fit, "vectors"))*2.4

ggplot(data = Primary_Production_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = ant_community_final_PP_functional_nutonly_fit_scores, aes(x = 0, y = 0, xend = ant_community_final_PP_functional_nutonly_fit_scores[,1], yend = ant_community_final_PP_functional_nutonly_fit_scores[,2]))+
  geom_text_repel(data = ant_community_final_PP_functional_nutonly_fit_scores, aes(x = ant_community_final_PP_functional_nutonly_fit_scores[,1]+0.1, y = ant_community_final_PP_functional_nutonly_fit_scores[,2]), fontface = "bold", label = rownames(ant_community_final_PP_functional_nutonly_fit_scores), nudge_y = -0.03)+
  geom_segment(data = Primary_Production_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = Primary_Production_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_prcmp_scores))

#PO4, NO2_NO3, and N/P ratio are significant. Not light or ammonia.
#working off the same dataset as those for the direct previous Primary Production section
#Mixo only activity model (may need to combine mixo? and mixo for PP data here)

ant_community_final_PP_mixo_only_activity <- select(ant_community_final_PP_calc_PCA_filter, Mixo_PP, Mixo_maybe_PP, MNAN_SS_perday)

Primary_Production_mixo_only_prcmp <- prcomp(ant_community_final_PP_mixo_only_activity, center = TRUE, scale = TRUE)

Primary_Production_mixo_only_prcmp_scores <- as.data.frame(scores(Primary_Production_mixo_only_prcmp))
Primary_Production_mixo_only_prcmp_species <- as.data.frame(scores(Primary_Production_mixo_only_prcmp, display = "species"))*2
Primary_Production_mixo_only_prcmp_scores$depth <- ant_community_final_PP_calc_PCA_filter$depth
Primary_Production_mixo_only_prcmp_scores$Group <- ant_community_final_PP_calc_PCA_filter$Group

ggplot(data = Primary_Production_mixo_only_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = Primary_Production_mixo_only_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = Primary_Production_mixo_only_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_mixo_only_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_mixo_only_prcmp_scores))

#Need to add envfit here - finish later
ant_community_final_PP_functional_mixo_only_fit <- envfit(Primary_Production_mixo_only_prcmp ~ ., ant_community_final_PP_functional_env, perm = 999)

ant_community_final_PP_functional_mixo_only_fit_scores <- as.data.frame(scores(ant_community_final_PP_functional_mixo_only_fit, "vectors"))*2.4

ggplot(data = Primary_Production_mixo_only_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = Primary_Production_mixo_only_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = Primary_Production_mixo_only_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_mixo_only_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_mixo_only_prcmp_scores))+
  geom_segment(data = ant_community_final_PP_functional_mixo_only_fit_scores, aes(x = 0, y = 0, xend = ant_community_final_PP_functional_mixo_only_fit_scores[,1], yend = ant_community_final_PP_functional_mixo_only_fit_scores[,2]))+
  geom_text_repel(data = ant_community_final_PP_functional_mixo_only_fit_scores, aes(x = ant_community_final_PP_functional_mixo_only_fit_scores[,1]+0.1, y = ant_community_final_PP_functional_mixo_only_fit_scores[,2]), fontface = "bold", label = rownames(ant_community_final_PP_functional_mixo_only_fit_scores), nudge_y = -0.03)

ant_community_final_PP_all_activity <- select(ant_community_final_PP_calc_PCA_filter, Mixo_PP, Mixo_maybe_PP, MNAN_SS_perday, HNAN_SS_perday, Phyto_PP)

Primary_Production_all_prcmp <- prcomp(ant_community_final_PP_all_activity, center = TRUE, scale = TRUE)

Primary_Production_all_prcmp_scores <- as.data.frame(scores(Primary_Production_all_prcmp))
Primary_Production_all_prcmp_species <- as.data.frame(scores(Primary_Production_all_prcmp, display = "species"))*2
Primary_Production_all_prcmp_scores$depth <- ant_community_final_PP_calc_PCA_filter$depth
Primary_Production_all_prcmp_scores$Group <- ant_community_final_PP_calc_PCA_filter$Group

#Group
summary(Primary_Production_all_prcmp)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5
## Standard deviation     1.6573 1.0992 0.7860 0.53144 0.38100
## Proportion of Variance 0.5493 0.2416 0.1235 0.05648 0.02903
## Cumulative Proportion  0.5493 0.7909 0.9145 0.97097 1.00000
Primary_Production_all_prcmp_plot <- ggplot(data = Primary_Production_all_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth))+
  geom_segment(data = Primary_Production_all_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = Primary_Production_all_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_all_prcmp_species), color = "red", fontface = "bold")+
  geom_text_repel(aes(x = PC1, y = PC2+0.1), size = 2,label = rownames(Primary_Production_all_prcmp_scores))+labs(shape = "Depth")+ xlab("PC1 (54.93%)")+ylab("PC2 (24.16%)")

ggplot(data = Primary_Production_all_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = Primary_Production_all_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = Primary_Production_all_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_all_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_all_prcmp_scores))

ggsave("ALL_activity_PCA.png", plot = Primary_Production_all_prcmp_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Depth
ggplot(data = Primary_Production_all_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = Primary_Production_all_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = Primary_Production_all_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_all_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_all_prcmp_scores))

#Envfit params
ant_community_final_PP_functional_all_fit <- envfit(Primary_Production_all_prcmp ~ ., ant_community_final_PP_functional_env, perm = 999)

ant_community_final_PP_functional_all_fit_scores <- as.data.frame(scores(ant_community_final_PP_functional_all_fit, "vectors"))*2.4

ggplot(data = Primary_Production_all_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = Primary_Production_all_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = Primary_Production_all_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_all_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_all_prcmp_scores))+
  geom_segment(data = ant_community_final_PP_functional_all_fit_scores, aes(x = 0, y = 0, xend = ant_community_final_PP_functional_all_fit_scores[,1], yend = ant_community_final_PP_functional_all_fit_scores[,2]))+
  geom_text_repel(data = ant_community_final_PP_functional_all_fit_scores, aes(x = ant_community_final_PP_functional_all_fit_scores[,1]+0.1, y = ant_community_final_PP_functional_all_fit_scores[,2]), fontface = "bold", label = rownames(ant_community_final_PP_functional_all_fit_scores), nudge_y = -0.03)
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Initial runs of the Envfit show massive amounts of significant correlation within the Primary Production data and environmental parameters all but only a few don’t show a sig correlation. Those that don’t show correlation are NH4, depth_m, PAR1, bottom (depth to floor). This is likely due to high amounts of cocorrelation within the dataset with latitiude. Primary production, Chl a, and flourescence follow the same trends with latitudes making it difficult to discern what is actually driving the primary production activity. Try a smaller model using less environmental parameters. Focusing on light, nutrients, maybe ice?.

Only seeing variation on the PC2 axis ~20-25% of variance explanation. May be missing something in the model here along the PC1 axis ~60% variance explanation. I’m gonna create a PCA analysis of all the possible response variables. So activity and abundance to look at how the sites spread along the PC1 and PC2 axis. Possibly bring in bact abundance here too. NOTHING learned from doing this

****Additional PCAs to try - Looking at PP and bacterivory of mixos together PLUS looking at all activity together (PP phyto, PP mixo, bacterivory hetero, bacterivory mixo) *COMPLETED

Next Need to convert %SS consumed to % Carbon consumed per day. This is NOT to say all carbon is used or adsorbed just eaten in the form of particulate matter and bacteria. Test to see if that makes a difference. Do this by using an average size calculation, considering using a range from 0.2 to 0.5 in diameter and assume coccoidal in average shape to get a general gist to see if it matters. Would allow for an estimate in how much C they are getting from feeding vs PP. VERY CRUDE estimate with a LOT of assumptions. ???May consider values as C per cell and use that as a comparison???

ant_community_final_PP_abund_act <- select(ant_community_final_PP_calc_PCA_filter, Mixo_PP, Mixo_maybe_PP, MNAN_SS_perday, HNAN_SS_perday, Phyto_PP, ANAN_mL:HNAN_mL)

ant_community_final_PP_abund_act_prcomp <- prcomp(ant_community_final_PP_abund_act, center = TRUE, scale = TRUE)

Primary_Production_abund_act_prcmp_scores <- as.data.frame(scores(ant_community_final_PP_abund_act_prcomp))
Primary_Production_abund_act_prcmp_species <- as.data.frame(scores(ant_community_final_PP_abund_act_prcomp, display = "species"))*2
Primary_Production_abund_act_prcmp_scores$depth <- ant_community_final_PP_calc_PCA_filter$depth
Primary_Production_abund_act_prcmp_scores$Group <- ant_community_final_PP_calc_PCA_filter$Group

#Group
ggplot(data = Primary_Production_abund_act_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group))+
  geom_segment(data = Primary_Production_abund_act_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = Primary_Production_abund_act_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_abund_act_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_abund_act_prcmp_scores))

#I first need to calculate the amount of carbon available for uptake by mixotrophs
#I will then assume a diameter of the high and low range 0.2 and 0.5 um and assume everything is spherical for this first test - there are more complicated ways of doing this but I don't want to waste time if this proves to be fruitless. 
#Calculation: bact_ml to bact_L then multiple by average bacterial volume = total biomass
#cont: biomass * conversion factor of either 2.2 or 3.0 x 10^-13 (g/mL) to get *available* g C per liter per day
#cont: multiple available C by SS removal % to get C removed by MNAN and or HNAN
#I can then normalize to a per cell basis by dividing by the number of cells for both PP and bacterivory for comparison there as well....we shall see what comes of this.

#Fro the paper given to me by JD average volume based on DAPI measurements were 0.045 +- 0.06
#Going to use this volume for my calculation.

#first *1000 is to convert to liters
#second *1000000 is to convert to ugrams
#third /100 is to convert from a whole number percent to decimal percent

ant_community_final_PP_calc_PCA_filter_bacbiomass <- mutate(ant_community_final_PP_calc_PCA_filter, 
                                      bac_biomass_avg = 0.045*Bact_mL,
                                      Carbon_biomass_perML = bac_biomass_avg*(2.2*10^-13),
                                      MNAN_C_removed = Carbon_biomass_perML*1000*MNAN_SS_perday*1000000/100,
                                      HNAN_C_removed = Carbon_biomass_perML*1000*HNAN_SS_perday*1000000/100,
                                      MNAN_C_removed_percell = MNAN_C_removed/(MNAN_mL*1000),
                                      HNAN_C_removed_percell = HNAN_C_removed/(HNAN_mL*1000),
                                      Mixo_PP_percell = Mixo_PP/(MNAN_mL*1000),
                                      MNAN_percent_bprod_removed = MNAN_C_removed/bprod*100,
                                      HNAN_percent_bprod_removed = HNAN_C_removed/bprod*100,
                                      MNAN_C_PP_percell_pot = Mixo_PP_potential/(MNAN_mL*1000),
                                      PNAN_C_PP_percell = Phyto_PP/(PNAN_mL*1000))

ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass)+
  geom_point(aes(x = station, y = Mixo_PP, color = depth))

ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass)+
  geom_boxplot(aes(x = station, y = MNAN_C_removed, fill = depth))+
  geom_boxplot(aes(x = station, y = HNAN_C_removed, color = depth))

ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass)+
  geom_boxplot(aes(x = station, y = HNAN_C_removed, fill = depth))

ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass)+
  geom_boxplot(aes(x = station, y = MNAN_C_removed, fill = depth))+
  geom_boxplot(aes(x = station, y = HNAN_C_removed, color = depth))+
  geom_point(aes(x = station, y = Mixo_PP, color = depth))

ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass)+
  geom_boxplot(aes(x = station, y = MNAN_C_removed_percell, fill = depth))+
  geom_boxplot(aes(x = station, y = HNAN_C_removed_percell, color = depth))
## Warning: Removed 4 rows containing non-finite values (`stat_boxplot()`).

ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass)+
  geom_boxplot(aes(x = station, y = Mixo_PP_percell, color = depth))
## Warning: Removed 4 rows containing non-finite values (`stat_boxplot()`).

#Want to look at comparison of abundance ratio and the ratio of reads for molecular data.
ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass)+
  geom_col(aes(x = station, y = mixo_act_percent, fill = "Mixo activity"))+
  geom_col(aes(x = station, y = phyto_act_percent, fill = "Phyto activity"))+
  geom_col(aes(x = station, y = mixo_maybe_act_percent, fill = "Mixo? activity"))

#The current data structure doesn't support how R reads and makes stacked bar plots.
#Need to filter and create a new dataset that has both molecular and microscopy datasets...USING MELT

functional_reads <- select(ant_community_final_PP_calc_PCA_filter_bacbiomass, station, mixo_act, mixo_maybe_act, phyto_act, hetero_act, ANAN_mL, MNAN_mL, HNAN_mL)

functional_reads_avg <- dplyr::group_by(functional_reads, station) %>% summarise_if(is.numeric, mean, rm.na = TRUE)

#This portion is adding everything and not the average percentages so I'm commenting them out
#functional_reads_melt <- melt(functional_reads, id.vars = "station")


#ggplot(filter(functional_reads_melt, variable == "mixo_act"|variable == "mixo_maybe_act"|variable == "phyto_act"))+
#  geom_col(aes(x = station, y = value, fill = variable))

#ggplot(filter(functional_reads_melt, !c(variable == "mixo_act"|variable == "mixo_maybe_act"|variable == "phyto_act")))+
#  geom_col(aes(x = station, y = value, fill = variable))

#using the grouped dataset
functional_reads_avg_melt <- melt(as.data.frame(functional_reads_avg), id.vars = "station")

ggplot(filter(functional_reads_avg_melt, variable == "mixo_act"|variable == "mixo_maybe_act"|variable == "phyto_act"))+
  geom_col(aes(x = station, y = value, fill = variable))

ggplot(filter(functional_reads_avg_melt, variable == "mixo_act"|variable == "mixo_maybe_act"|variable == "phyto_act"|variable == "hetero_act"))+
  geom_col(aes(x = station, y = value, fill = variable))

ggplot(filter(functional_reads_avg_melt, !c(variable == "mixo_act"|variable == "mixo_maybe_act"|variable == "phyto_act"|variable == "hetero_act")))+
  geom_col(aes(x = station, y = value, fill = variable))

#Now to attempt to validate incase people ask....Need to look at HNAN to plastidic ratios to see if they correlate the same and how that compares to the feeding estimates we measure to assess at how much error there could be using this approach.

#Want to do PCA analyses using the Carbon data instead of the raw ingestions of bacteria. That way we are doing PCAs using the same unit of measure across the measured physiological responses. (besides abundance data)

#Carbon based PCA analyses---------

ant_community_final_PP_carbon_based <- select(ant_community_final_PP_calc_PCA_filter_bacbiomass, Mixo_PP, Mixo_maybe_PP, MNAN_C_removed, HNAN_C_removed, Phyto_PP, ANAN_mL:HNAN_mL,Bact_mL, bprod)

ant_community_final_PP_carbon_based_prcomp <- prcomp(ant_community_final_PP_carbon_based, center = TRUE, scale = TRUE)

Primary_Production_carbon_based_prcmp_scores <- as.data.frame(scores(ant_community_final_PP_carbon_based_prcomp))
Primary_Production_carbon_based_prcmp_species <- as.data.frame(scores(ant_community_final_PP_carbon_based_prcomp, display = "species"))*2
Primary_Production_carbon_based_prcmp_scores$depth <- ant_community_final_PP_calc_PCA_filter_bacbiomass$depth
Primary_Production_carbon_based_prcmp_scores$Group <- ant_community_final_PP_calc_PCA_filter_bacbiomass$Group

#Group
ggplot(data = Primary_Production_carbon_based_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = depth))+
  geom_segment(data = Primary_Production_carbon_based_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = Primary_Production_carbon_based_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_carbon_based_prcmp_species), color = "red", fontface = "bold")+
  geom_text(aes(x = PC1, y = PC2+0.1), size = 3,label = rownames(Primary_Production_carbon_based_prcmp_scores))

#Checking the spread based on PC2 and PC3 axes ~33 percent total variation explained
#Not worth exploring...better to look at just PC1 and PC2 for all analyses...there's always going to be a lot of variation not explained as everything cocorrelates with lat and somewhat with long.
#ggplot(data = Primary_Production_carbon_based_prcmp_scores, aes(x = PC2, y = PC3))+
#  geom_point(aes(color = Group))+
#  geom_segment(data = Primary_Production_carbon_based_prcmp_species, aes(x = 0, y = 0, xend = #PC2, yend = PC3), color = "red")+
#  geom_text_repel(data = Primary_Production_carbon_based_prcmp_species, aes(x = PC2+0.1, y = #PC3-0.09), label = rownames(Primary_Production_carbon_based_prcmp_species), color = "red", #fontface = "bold")+
#  geom_text(aes(x = PC2, y = PC3+0.1), size = 3,label = #rownames(Primary_Production_carbon_based_prcmp_scores))

#Carbon based PCA analyis W/o bact-----------
ant_community_final_PP_carbon_based_WoBact <- select(ant_community_final_PP_calc_PCA_filter_bacbiomass, Mixo_PP, Mixo_maybe_PP, MNAN_C_removed, HNAN_C_removed, Phyto_PP, ANAN_mL:HNAN_mL)

ant_community_final_PP_carbon_based_WoBact_prcomp <- prcomp(ant_community_final_PP_carbon_based_WoBact, center = TRUE, scale = TRUE)

Primary_Production_carbon_based_WoBact_prcmp_scores <- as.data.frame(scores(ant_community_final_PP_carbon_based_WoBact_prcomp))
Primary_Production_carbon_based_WoBact_prcmp_species <- as.data.frame(scores(ant_community_final_PP_carbon_based_WoBact_prcomp, display = "species"))*2
Primary_Production_carbon_based_WoBact_prcmp_scores$depth <- ant_community_final_PP_calc_PCA_filter_bacbiomass$depth
Primary_Production_carbon_based_WoBact_prcmp_scores$Group <- ant_community_final_PP_calc_PCA_filter_bacbiomass$Group

#Group
summary(ant_community_final_PP_carbon_based_WoBact_prcomp)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     2.1367 1.3241 1.0837 0.7966 0.64266 0.57648 0.26193
## Proportion of Variance 0.5073 0.1948 0.1305 0.0705 0.04589 0.03693 0.00762
## Cumulative Proportion  0.5073 0.7021 0.8326 0.9031 0.94900 0.98593 0.99355
##                            PC8      PC9
## Standard deviation     0.24073 0.008316
## Proportion of Variance 0.00644 0.000010
## Cumulative Proportion  0.99999 1.000000
ALL_response_Wo_bact_Cbased_plot <- ggplot(data = Primary_Production_carbon_based_WoBact_prcmp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth))+
  geom_segment(data = Primary_Production_carbon_based_WoBact_prcmp_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text_repel(data = Primary_Production_carbon_based_WoBact_prcmp_species, aes(x = PC1+0.1, y = PC2-0.09), label = rownames(Primary_Production_carbon_based_WoBact_prcmp_species), color = "red", fontface = "bold", size = 3)+
  geom_text_repel(aes(x = PC1, y = PC2+0.1), size = 2.2,label = rownames(Primary_Production_carbon_based_WoBact_prcmp_scores)) + labs(shape = "Depth") +xlab("PC1 (50.73%)")+ ylab("PC2 (19.48%)")

ALL_response_Wo_bact_Cbased_plot

#ggsave("ALL_response_Wo_bact_Cbased.png", plot = ALL_response_Wo_bact_Cbased_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
#I need to clean up the spread of the data by redoing this analysis using averaged dataset across stations again.

#validation attempt using heterotrophic to plastidic ratios----

ant_community_final_PP_calc_PCA_filter_bacbiomass_validation <- mutate(ant_community_final_PP_calc_PCA_filter_bacbiomass, 
hetero_act_percent_forfeed = hetero_act/(hetero_act+mixo_act+mixo_maybe_act),
#hetero_act_feeding_C = Carbon_biomass_perML*hetero_act_percent_forfeed*1000000*100,
#mixo_act_feeding = Carbon_biomass_perML*(1-hetero_act_feeding_C)*1000000*100,
#This takes as a percentage of total bacterial biomass and NOT the bacterial biomass removed. GROSS overestimation.
hetero_act_feeding_C_redo = (MNAN_C_removed+HNAN_C_removed)*hetero_act_percent_forfeed,
mixo_act_feeding_redo = (MNAN_C_removed+HNAN_C_removed)*(1-hetero_act_percent_forfeed),
hetero_act_feeding_C_percent_bprod = hetero_act_feeding_C_redo/bprod,
mixo_act_feeding_C_percent_bprod = mixo_act_feeding_redo/bprod
)


#ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation)+
#  geom_boxplot(aes(x = station, y = mixo_act_feeding, fill = depth))+
#  geom_boxplot(aes(x = station, y = MNAN_C_removed, fill = depth))
#
#ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation)+
#  geom_boxplot(aes(x = station, y = hetero_act_feeding_C, fill = depth))+
#  geom_boxplot(aes(x = station, y = HNAN_C_removed, fill = depth))
#
#ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation)+
#  geom_boxplot(aes(x = station, y = mixo_act_feeding, fill = depth))+
#  geom_boxplot(aes(x = station, y = hetero_act_feeding_C, fill = depth))

ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation)+
  geom_boxplot(aes(x = station, y = MNAN_C_removed, fill = depth))+
  geom_boxplot(aes(x = station, y = HNAN_C_removed, color = depth))+
  labs(title = "Estimation of C removed by MNANs and HNANs")

#The first attempt at this validation the percent act applied was of total available biomass which is why we are seeing C values doubled.

#REDO but with only CONSUMED C
ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation)+
  geom_boxplot(aes(x = station, y = mixo_act_feeding_redo, fill = depth))+
  geom_boxplot(aes(x = station, y = hetero_act_feeding_C_redo, color = depth))+
  labs(title = "Estimation of C removed by MNANs and HNANs via read %")

#Testing assumptions by camparing percent bprod removed using each method of bacterivory estimation - can have greater than 100% removed for 2 reasons
#1) protists are incubated with bacteria so not all of the systems bprod has been measured
#2) Protists can graze on both production and biomass (bact_mL) for a total over 100%

ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation)+
  geom_boxplot(aes(x = station, y = MNAN_percent_bprod_removed, fill = depth))+
  geom_boxplot(aes(x = station, y = HNAN_percent_bprod_removed, color = depth))+
  labs(title = "Estimation of C removed by MNANs and HNANs")

#REDO but with only CONSUMED C
ggplot(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation)+
  geom_boxplot(aes(x = station, y = mixo_act_feeding_C_percent_bprod, fill = depth))+
  geom_boxplot(aes(x = station, y = hetero_act_feeding_C_percent_bprod, color = depth))+
  labs(title = "Estimation of C removed by MNANs and HNANs via read %")

#First trial with entire dataset threw an error for cor.test. Think this because there are non-numeric columns in the dataset. I should also removed all of the redundant vars.
#Initial attempt with this doesn't use the df WITH the %read feeding data, only PP.
#No %read bacterivory data: *ant_community_final_PP_calc_PCA_filter_bacbiomass*
#With %read bacterivory data: *ant_community_final_PP_calc_PCA_filter_bacbiomass_validation*
#substituting the dfs should only change the additional 2 measures and shouldn't impact correlation matrix pipeline....
#This fixes colnames
colnames(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation)[which(colnames(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation) %in% colname_fix)] <- c("ZML","ZE", "Water_Temp", "conductivity", "oxygen","PAR","Time", "salinity", "oxygen_saturation")

#Again a MAJOR difference between the two is the correlation analysis is done using the ungrouped data here, this may allow for some slight differences and likely some additional significance signalling here where it may get missed in the grouped dataset. 

ant_community_final_PP_calc_PCA_filter_bacbiomass_num_select <- select_if(ant_community_final_PP_calc_PCA_filter_bacbiomass_validation, is.numeric)
cor_rownames_C <- colnames(ant_community_final_PP_calc_PCA_filter_bacbiomass_num_select)
cor_colnames_C <- colnames(ant_community_final_PP_calc_PCA_filter_bacbiomass_num_select)
#cor_rownames_C <- cor_rownames_C[-(1:3)]
#cor_colnames_C <- cor_colnames_C[-(1:3)]
cor_matrix_C <- matrix(nrow = length(cor_colnames_C),
                     ncol = length(cor_colnames_C), dimnames = list(cor_rownames_C, cor_colnames_C))
cor_p_matrix_C <- matrix(nrow = length(cor_colnames_C),
                       ncol = length(cor_colnames_C), dimnames = list(cor_rownames_C, cor_colnames_C))

#used to debug my code
#for(i in colnames(cor_matrix)){
#  temp_list <- ant_community_final_calc_grp_remStations[,i]
#  print(temp_list)
#}
#str(temp_list)
#str(unlist(ant_community_final_calc_grp_remStations[,"Lat_deg"]))

#str(ant_community_final_calc_grp_remStations[,-(1:3)])
temp_i_vector_C <- vector()
temp_j_vector_C <- vector()
for(i in colnames(cor_matrix_C)){
  for(j in rownames(cor_matrix_C)){
    temp_i_vector_C <- as.numeric(unlist(ant_community_final_PP_calc_PCA_filter_bacbiomass_num_select[,i]))
    temp_j_vector_C <- as.numeric(unlist(ant_community_final_PP_calc_PCA_filter_bacbiomass_num_select[,j]))
    temp_cor <- cor.test(temp_i_vector_C,temp_j_vector_C)
    cor_matrix_C[j,i] <- temp_cor[["estimate"]]
    cor_p_matrix_C[j,i] <- temp_cor[["p.value"]]
  }
}
## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero
cor_matrix_melt_C <- melt(cor_matrix_C)
cor_p_matrix_melt_C <- melt(cor_p_matrix_C)

#Mixo_PP, Mixo_maybe_PP, MNAN_C_removed, HNAN_C_removed, Phyto_PP, ANAN_mL:HNAN_mL,Bact_mL, bprod

#ant_community_final_calc_env_grp_remST_remREDUND_colnames <- colnames(ant_community_final_calc_env_grp_remST_remREDUND[,-c(2,7)])

#ant_community_final_calc_env_grp_remST_remREDUND_colnames[24] <- "ZE"
#janky fix but I'm tired and this will work
#ant_community_final_calc_env_grp_remST_remREDUND_colnames[25] <- "ZML"

cor_martrix_final_filter_C <- melt(cor_matrix_C[which(colnames(cor_matrix_C) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames), c("bprod", "Bact_mL","MNAN_C_removed", "HNAN_C_removed", "ANAN_mL", "PNAN_mL", "MNAN_mL", "HNAN_mL","Mixo_PP", "Mixo_maybe_PP","Phyto_PP")])

cor_p_martrix_final_filter_C <- melt(cor_p_matrix_C[which(colnames(cor_matrix_C) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames), c("bprod", "Bact_mL","MNAN_C_removed", "HNAN_C_removed", "ANAN_mL", "PNAN_mL", "MNAN_mL", "HNAN_mL","Mixo_PP", "Mixo_maybe_PP","Phyto_PP")])

#adding Pvalues to the melted data
cor_martrix_final_filter_C$pvalue <- cor_p_martrix_final_filter_C$value

All_responses_correlation_plot <- ggplot(cor_martrix_final_filter_C, aes(X1,X2))+
  geom_tile(aes(fill = value, color = pvalue<0.05), lwd = 0.5, height = 0.75, width = 0.95)+scale_color_manual(name ='pvalue<0.05', values = setNames(c('black','White'), c(T,F)))+scale_fill_gradient2()+
  theme(axis.text.y = element_text(size = 5), axis.text.x = element_text(size = 5))+
  scale_y_discrete(guide = guide_axis(angle = 90))+
  geom_text(label = round(cor_martrix_final_filter_C$value, 3), size = 1.5)+
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank())+
  coord_flip()+labs(fill = "Correlation Value")

All_responses_correlation_plot

ggsave("All_responses_correlationmatrix.png", plot = All_responses_correlation_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Testing throwing in depth_m here
ant_community_final_calc_env_grp_remST_remREDUND_colnames_test <- ant_community_final_calc_env_grp_remST_remREDUND_colnames

ant_community_final_calc_env_grp_remST_remREDUND_colnames_test[26] <- "depth_m"

cor_martrix_final_filter_C_test <- melt(cor_matrix_C[which(colnames(cor_matrix_C) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames_test), c("bprod", "Bact_mL","MNAN_C_removed", "HNAN_C_removed", "PNAN_mL", "MNAN_mL", "HNAN_mL","Mixo_PP_potential","Phyto_PP", "hetero_act_feeding_C_redo", "mixo_act_feeding_redo")])

cor_p_martrix_final_filter_C_test <- melt(cor_p_matrix_C[which(colnames(cor_matrix_C) %in% ant_community_final_calc_env_grp_remST_remREDUND_colnames_test), c("bprod", "Bact_mL","MNAN_C_removed", "HNAN_C_removed", "PNAN_mL", "MNAN_mL", "HNAN_mL","Mixo_PP_potential","Phyto_PP", "hetero_act_feeding_C_redo", "mixo_act_feeding_redo")])

#adding Pvalues to the melted data
cor_martrix_final_filter_C_test$pvalue <- cor_p_martrix_final_filter_C_test$value

All_responses_correlation_plot_test <- ggplot(cor_martrix_final_filter_C_test, aes(X1,X2))+
  geom_tile(aes(fill = value, color = pvalue<0.05), lwd = 0.5, height = 0.75, width = 0.95)+scale_color_manual(name ='pvalue<0.05', values = setNames(c('black','White'), c(T,F)))+scale_fill_gradient2()+
  theme(axis.text.y = element_text(size = 5), axis.text.x = element_text(size = 5))+
  scale_y_discrete(guide = guide_axis(angle = 90))+
  geom_text(label = round(cor_martrix_final_filter_C_test$value, 3), size = 1.5)+
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank())+
  coord_flip()+labs(fill = "Correlation Value")

All_responses_correlation_plot_test

ggsave("All_responses_correlation_plot_test.pdf", plot = All_responses_correlation_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#I have already done the sourcing of the coding for the I only have one envfit from all the PCA plots I've done. I will now go back and do the envfits
#Will return to this once I've completed the evfits.

#Creating envfits here:
#Fits already done
#ant_community_final_PP_functional_fit - initial fit with just PP data (mixo PP, mixo? PP, ANAN PP)
#Sig Vars: PO4, NO2_NO3, latitude, ZML_TS_m..0.023Kg.m3., ice, Chla, timeJ
#ant_community_final_PP_functional_nutonly_fit - same as above but with a subset of en vars
#ant_community_final_PP_functional_mixo_only_fit - mixo only PP and feeding fit with all env vars - non carbon feeding (Mixo PP, mixo maybe PP, and MNAN SS perday)
#Sig Vars: NO2_NO3, timeJ, latitude, bprod, ZML_TS_m..0.023Kg.m3., Chla, ice
#ant_community_final_PP_functional_all_fit - All functional measures with all env vars but not C based feeding (Mixo PP, mixo?PP, PNAN PP, HNAN SS perday and MNAN SS perday)
#Sig Vars: NO2_NO3, timeJ, latitude, bprod, ZML_TS_m..0.023Kg.m3., Chla, ice, NH4

#Fits to create here:
#Mixo all activity and abundance measures MNAN/HNAN/PNAN per ml and activity
#ant_community_final_PP_abund_act_prcomp
ant_community_final_PP_abund_act_fit <- envfit(ant_community_final_PP_abund_act_prcomp ~ ., ant_community_final_PP_functional_env, perm = 999)
#Sig Vars: NO2_NO3, bprod, latitude, timeJ, PAR1, ZML_TS_m..0.023Kg.m3., ice, NH4

#**NOT WORRYING ABOUT THIS ONE**
#All activity and abundance with Bact in Carbon based measures
#ant_community_final_PP_carbon_based_prcomp
ant_community_final_PP_carbon_based_fit <- envfit(ant_community_final_PP_carbon_based_prcomp ~ ., ant_community_final_PP_functional_env, perm = 999)
#**NOT WORRYING ABOUT THIS ONE**

#All activity and abundance measures w/o bact in carbon based measures
#ant_community_final_PP_carbon_based_WoBact_prcomp
ant_community_final_PP_carbon_based_WoBact_fit <- envfit(ant_community_final_PP_carbon_based_WoBact_prcomp ~ ., ant_community_final_PP_functional_env, perm = 999)
#Sig vars: NO2_NO3, bprod, timeJ, latitude, ZML_TS_m..0.023Kg.m3., ice, NH4

#Notes from previous model code chunk earlier in script----
#Second create objects of the distance matrices used in previous adonis calls for AICc table function
#bacterivory_dist_matrix <- vegdist(scale(ant_community_final_calc_bacterivory_grp_fixed_remST), "euclidean")

#functional_dist_matrix <- vegdist(scale(ant_community_final_functional_grp_fixed_remST), "euclidean")
#**I dont need to make these matrix as I can call it in the AIC table function**
#Third create vector obects with significant vars from each env fit
#bacterivory_sig_vars <- c("latitude","NH4","ZML","Chla")

#functional_sig_vars <- c("latitude","ice","fluorescence","NH4")

#First test for the output
#AICc.table.Nvar(bacterivory_sig_vars, matrix.char = bacterivory_dist_matrix, perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

#Initial run of the previous sourced functions didn't work with the current structuring of the data and the evolution of the r packages.

#Script has been edited and fixed by me and JD. New sourcing and function here:
#source("/Users/christophercarnivale/Desktop/Dissertation_data/R_Scripts_adonis_modelselection/AICc_table_generation_edited.R")

#Bacterivory AIC model selection
#AICc.table.Nvar(bacterivory_sig_vars, matrix.char = bacterivory_dist_matrix, perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

#AICc.table.all(bacterivory_sig_vars, matrix.char = bacterivory_dist_matrix, perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST, comb.incl = c(1,2,3))

#Functional AIC Model selection
#AICc.table.Nvar(functional_sig_vars, matrix.char = functional_dist_matrix, perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

#AICc.table.all(functional_sig_vars, matrix.char = functional_dist_matrix, perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST, comb.incl = c(1,2,3))

#I will quickly add AIC model selections for PP and hetero only functional analysis

#Functional PP model selection
#Sig Vars: latitude, NO2_NO3, ZML, ice, depth_m, Chla, oxygen, flourescence, beamTrans,Time, HN4
#AICc.table.Nvar(c("latitude", "NO2_NO3", "ZML", "ice"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST_PP), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

#AICc.table.all(c("latitude", "NO2_NO3", "ZML", "ice"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST_PP), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST, comb.incl = c(1,2,3))

#Functional hetero Model selection
#Sig Vars: PAR, NH4, flourescence, ZML, depth_m, ice
#AICc.table.Nvar(c("PAR", "NH4", "fluorescence", "ZML"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST_hetero), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST)

#AICc.table.all(c("PAR", "NH4", "fluorescence", "ZML"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST_hetero), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST, comb.incl = c(1,2,3))


#Model selection of all PP included PCA models Start----
#Fits are NOT inputs into the model selection the raw scaled matrices (env/response) and the sig vars as DETERMINED by the fits are calls in the selection function.
#using sig vars as determined from each env fit
## All PP functional------
#df for first model selection: ant_community_final_PP_functional
ant_community_final_PP_functional_MoSelNvar <- AICc.table.Nvar(c("PO4", "NO2_NO3", "latitude", "ZML", "ice", "Chla", "Time"), matrix.char = vegdist(scale(ant_community_final_PP_functional), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_PP_functional_env)

ant_community_final_PP_functional_MoSelALL <- AICc.table.all(c("PO4", "NO2_NO3", "latitude", "ZML", "ice", "Chla", "Time"), matrix.char = vegdist(scale(ant_community_final_PP_functional), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
## Mixo Only PP functional (actually activity)-------
ant_community_final_PP_mixo_only_activity_MoSelNvar <- AICc.table.Nvar(c("NO2_NO3", "Time", "latitude", "bprod", "ZML", "Chla", "ice"), matrix.char = vegdist(scale(ant_community_final_PP_mixo_only_activity), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_PP_functional_env)

ant_community_final_PP_mixo_only_activity_MoSelALL <- AICc.table.all(c("NO2_NO3", "Time", "latitude", "bprod", "ZML", "Chla", "ice"), matrix.char = vegdist(scale(ant_community_final_PP_mixo_only_activity), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
## All functional measures w PNAN and HNAN NO CARBON FEEDING ------
Primary_Production_all_prcmp_MoSelNvar <-AICc.table.Nvar(c("NO2_NO3", "Time", "latitude", "bprod", "ZML", "Chla", "ice", "NH4"), matrix.char = vegdist(scale(ant_community_final_PP_all_activity), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_PP_functional_env)

Primary_Production_all_prcmp_MoSelALL <-AICc.table.all(c("NO2_NO3", "Time", "latitude", "bprod", "ZML", "Chla", "ice", "NH4"), matrix.char = vegdist(scale(ant_community_final_PP_all_activity), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
## Mixo all activity and abundance measures NON Carbon based ----
ant_community_final_PP_abund_act_prcomp_MoSelNvar <- AICc.table.Nvar(c("NO2_NO3", "bprod", "latitude", "Time", "PAR", "ZML", "ice", "NH4"), matrix.char = vegdist(scale(ant_community_final_PP_abund_act), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_PP_functional_env)

ant_community_final_PP_abund_act_prcomp_MoSelALL <- AICc.table.all(c("NO2_NO3", "bprod", "latitude", "Time", "PAR", "ZML", "ice", "NH4"), matrix.char = vegdist(scale(ant_community_final_PP_abund_act), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
##All activity and abundance measures w/o bact in carbon based measures -----
ant_community_final_PP_carbon_based_WoBact_prcomp_MoSelNvar <- AICc.table.Nvar(c("NO2_NO3", "Time", "latitude", "bprod", "ZML", "ice", "NH4"), matrix.char = vegdist(scale(ant_community_final_PP_carbon_based_WoBact), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_PP_functional_env)

ant_community_final_PP_carbon_based_WoBact_prcomp_MoSelALL <- AICc.table.all(c("NO2_NO3", "Time", "latitude", "bprod", "ZML", "ice", "NH4"), matrix.char = vegdist(scale(ant_community_final_PP_carbon_based_WoBact), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
#Model selection of all PP included PCA models end----

##Making tables for model selection dfs ----
#knitr::kable(ant_community_final_PP_carbon_based_WoBact_prcomp_MoSelALL)

#ggtexttable(ant_community_final_PP_carbon_based_WoBact_prcomp_MoSelALL[1:10,])

#ggtexttable(ant_community_final_PP_carbon_based_WoBact)

#flextable(ant_community_final_PP_carbon_based_WoBact_prcomp_MoSelALL[1:10,])

#in order to save space I will reorder all envfits and model selection based on P values and AIC values and plot a subeset of vars to save space. This is done in the written function script I have source next line.

source("/Users/christophercarnivale/Desktop/Dissertation_data/R_Scripts_adonis_modelselection/Adonis_envit_flextable.R")

##Carbon based adonis/envfit tables-----
#All activity and abundance measures w/o bact in carbon based measures
ant_community_final_PP_carbon_based_WoBact_adonis_table <- add_header_lines(Adonis_model_selection_table(ant_community_final_PP_carbon_based_WoBact_prcomp_MoSelALL, "AICc.values"), values = "All activity and abundance Carbon Based")

ant_community_final_PP_carbon_based_WoBact_envfit_table <- envfit_vars_selection_table(ant_community_final_PP_carbon_based_WoBact_fit, titles = "All activity and abundance Carbon Based", sigcodes = "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1; Permutations 999 Free")

#All PP only activity PNAN and MNAN in Carbon based metrics
ant_community_final_PP_functional_adonis_table <- add_header_lines(Adonis_model_selection_table(ant_community_final_PP_functional_MoSelALL, "AICc.values"), values = "All PP activity Carbon Based")

ant_community_final_PP_functional_envfit_table <- envfit_vars_selection_table(ant_community_final_PP_functional_fit, titles = "All PP activity Carbon Based", sigcodes = "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1; Permutations 999 Free")
#NEED to add Carbon based community right now only have SS per day done.

##Non-Carbon adonis based adonis/envfit tables ----
#Put most important ones first
#Mixo all activity and abundance measures NON Carbon based ----
#Mixo all activity and abundance measures MNAN/HNAN/PNAN per ml and activity
ant_community_final_PP_abund_act_prcomp_adonis_table <-  add_header_lines(Adonis_model_selection_table(ant_community_final_PP_abund_act_prcomp_MoSelALL, "AICc.values"), values = "All activity and abundance non Carbon Based")

#Bits of debugging code for envfit table
#View(envfit_vars_selection_table(ant_community_final_PP_abund_act_fit))
#add_header_row(x = envfit_vars_selection_table(ant_community_final_PP_abund_act_fit), values = "All Activity non Carbon Based", colwidths = 6)

ant_community_final_PP_abund_act_prcomp_envfit_table <- envfit_vars_selection_table(ant_community_final_PP_abund_act_fit, titles = "All activity and abundance non Carbon Based", sigcodes = "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1; Permutations 999 Free")

#All functional (activity) measures with all env vars but not C based feeding-----
#ant_community_final_PP_functional_all_fit (Mixo PP, mixo?PP, PNAN PP, HNAN SS perday and MNAN SS perday)
ant_community_final_PP_functional_all_adonis_table <- add_header_lines(Adonis_model_selection_table(ant_community_final_PP_functional_MoSelALL, "AICc.values"), "All activity non-Carbon Based")

ant_community_final_PP_functional_all_envfit_table <- envfit_vars_selection_table(ant_community_final_PP_functional_fit, titles = "All activity non-Carbon Based", sigcodes = "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1; Permutations 999 Free")

#Mixo only functional measures (actually activity)--------
#ant_community_final_PP_functional_mixo_only_fit - mixo only PP and feeding fit with all env vars - non carbon feeding (Mixo PP, mixo maybe PP, and MNAN SS perday)

ant_community_final_PP_activity_mixo_only_adonis_table <- add_header_lines(Adonis_model_selection_table(ant_community_final_PP_mixo_only_activity_MoSelALL, "AICc.values"), "MNAN Activity PP and non C based feeding")

ant_community_final_PP_activity_mixo_only_envfit_table <- envfit_vars_selection_table(ant_community_final_PP_functional_mixo_only_fit, titles = "MNAN Activity PP and non C based feeding", sigcodes = "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1; Permutations 999 Free")

#Tables of Original of adonis and envfit models from feeding only dataset start-----
#OG Functional (abundance) Model Tables-----
functional_prcomp_grp_remST_remREDUND_adonis_table <- add_header_lines(Adonis_model_selection_table(ant_community_final_functional_grp_fixed_remST_MoSelALL, "AICc.values"), values = "Functional Groups by abundance")

functional_prcomp_grp_remST_remREDUND_envfit_table <- envfit_vars_selection_table(functional_prcomp_grp_remST_remREDUND_fit, titles = "Functional Groups by abundance", sigcodes = "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1; Permutations 999 Free")

#OG Bacterivory Model Tables-----
bacterivory_prcomp_grp_remST_remREDUND_adonis_table <- add_header_lines(Adonis_model_selection_table(ant_community_final_calc_bacterivory_grp_fixed_remST_MoSelALL, "AICc.values"), values = "HNAN/MNAN SS perday only")

bacterivory_prcomp_grp_remST_remREDUND_envfit_table <- envfit_vars_selection_table(bacterivory_prcomp_grp_remST_remREDUND_fit, titles = "HNAN/MNAN SS perday only", sigcodes = "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1; Permutations 999 Free")

#PP Only functional (abundance) Model Tables -------
functional_prcomp_grp_remST_PP_remREDUND_adonis_table <- add_header_lines(Adonis_model_selection_table(ant_community_final_functional_grp_fixed_remST_PP_MoSelALL, "AICc.values"), values = "ANAN and MNAN abundance only")

functional_prcomp_grp_remST_PP_remREDUND_envfit_table <- envfit_vars_selection_table(functional_prcomp_grp_remST_PP_remREDUND_fit, titles = "ANAN and MNAN abundance only", sigcodes = "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1; Permutations 999 Free")

#Hetero Only functional (abundance) Model Tables ----------
bacterivory_prcomp_grp_remST_hetero_remREDUND_adonis_table <- add_header_lines(Adonis_model_selection_table(ant_community_final_functional_grp_fixed_remST_hetero_MoSelALL, "AICc.values"), values = "HNAN and MNAN abundance only")

bacterivory_prcomp_grp_remST_hetero_remREDUND_envfit_table <- envfit_vars_selection_table(bacterivory_prcomp_grp_remST_hetero_remREDUND_fit, titles = "HNAN and MNAN abundance only", sigcodes = "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1; Permutations 999 Free")
#Tables of Original of adonis and envfit models from feeding only dataset end-----

#Can I add header lines post making the adonis table?
#add_header_lines(bacterivory_prcomp_grp_remST_hetero_remREDUND_adonis_table, values = "HNAN and MNAN abundance only")
#YES I CAN

#Saving all of the tables as PNG files ------
#Hetero Only functional (abundance) Model Tables
save_as_image(bacterivory_prcomp_grp_remST_hetero_remREDUND_envfit_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_MNAN_HNAN_abundance_only.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_MNAN_HNAN_abundance_only.png"
save_as_image(bacterivory_prcomp_grp_remST_hetero_remREDUND_adonis_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_MNAN_HNAN_abundance_only.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_MNAN_HNAN_abundance_only.png"
#PP Only functional (abundance) 
save_as_image(functional_prcomp_grp_remST_PP_remREDUND_envfit_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_PNAN_MNAN_abundance_only.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_PNAN_MNAN_abundance_only.png"
save_as_image(functional_prcomp_grp_remST_PP_remREDUND_adonis_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_PNAN_MNAN_abundance_only.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_PNAN_MNAN_abundance_only.png"
#OG Bacterivory Model Tables
save_as_image(bacterivory_prcomp_grp_remST_remREDUND_envfit_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_MNAN_HNAN_SS_perday_only.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_MNAN_HNAN_SS_perday_only.png"
save_as_image(bacterivory_prcomp_grp_remST_remREDUND_adonis_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_MNAN_HNAN_SS_perday_only.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_MNAN_HNAN_SS_perday_only.png"
#OG Functional (abundance) Model Tables
save_as_image(functional_prcomp_grp_remST_remREDUND_envfit_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_MNAN_HNAN_ANAN_PNAN_perML.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_MNAN_HNAN_ANAN_PNAN_perML.png"
save_as_image(functional_prcomp_grp_remST_remREDUND_adonis_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_MNAN_HNAN_ANAN_PNAN_perML.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_MNAN_HNAN_ANAN_PNAN_perML.png"
#All activity and abundance measures w/o bact in carbon based measures
save_as_image(ant_community_final_PP_carbon_based_WoBact_envfit_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_All_response_C.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_All_response_C.png"
save_as_image(ant_community_final_PP_carbon_based_WoBact_adonis_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_All_response_C.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_All_response_C.png"
#Mixo all activity - ant_community_final_PP_functional_mixo_only_fit - mixo only PP and feeding fit with all env vars - non carbon feeding (Mixo PP, mixo maybe PP, and MNAN SS perday)
save_as_image(ant_community_final_PP_activity_mixo_only_envfit_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_mixo_only_activity_noC.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_mixo_only_activity_noC.png"
save_as_image(ant_community_final_PP_activity_mixo_only_adonis_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_mixo_only_activity_noC.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_mixo_only_activity_noC.png"
#All PP only activity PNAN and MNAN in Carbon based metrics
save_as_image(ant_community_final_PP_functional_envfit_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_All_PP_only.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_All_PP_only.png"
save_as_image(ant_community_final_PP_functional_adonis_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_All_PP_only.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_All_PP_only.png"
#Mixo all activity and abundance measures MNAN/HNAN/PNAN per ml and activity
save_as_image(ant_community_final_PP_abund_act_prcomp_envfit_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_All_activity_abund_noC.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_All_activity_abund_noC.png"
save_as_image(ant_community_final_PP_abund_act_prcomp_adonis_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_All_activity_abund_noC.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_All_activity_abund_noC.png"
#All functional measures with all env vars but not C based feeding
#ant_community_final_PP_functional_all_fit (Mixo PP, mixo?PP, PNAN PP, HNAN SS perday and MNAN SS perday)
save_as_image(ant_community_final_PP_functional_all_envfit_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_All_activity_noC.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Envfit_All_activity_noC.png"
save_as_image(ant_community_final_PP_functional_all_adonis_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_All_activity_noC.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Adonis_All_activity_noC.png"
#Make and save the Carbon Removed and PP ggplots
#filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q"), !depth == "Bucket")
#Use this for non-percent read data

#filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")
#Use for percent read data

#Make a version of the calculation that has station A in there
ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA <- mutate(ant_community_final_PP_calc, 
                                      bac_biomass_avg = 0.045*Bact_mL,
                                      Carbon_biomass_perML = bac_biomass_avg*(2.2*10^-13),
                                      MNAN_C_removed = Carbon_biomass_perML*1000*MNAN_SS_perday*1000000/100,
                                      HNAN_C_removed = Carbon_biomass_perML*1000*HNAN_SS_perday*1000000/100,
                                      MNAN_C_removed_percell = MNAN_C_removed/(MNAN_mL*1000),
                                      HNAN_C_removed_percell = HNAN_C_removed/(HNAN_mL*1000),
                                      Mixo_PP_percell = Mixo_PP/(MNAN_mL*1000),
                                      MNAN_percent_bprod_removed = MNAN_C_removed/(bprod*24/1000000*1500)*100,
                                      HNAN_percent_bprod_removed = HNAN_C_removed/(bprod*24/1000000*1500)*100,
                                      hetero_act_percent_forfeed = hetero_act/(hetero_act+mixo_act+mixo_maybe_act),
                                      hetero_act_feeding_C_redo = (MNAN_C_removed+HNAN_C_removed)*hetero_act_percent_forfeed,
                                      mixo_act_feeding_redo = (MNAN_C_removed+HNAN_C_removed)*(1-hetero_act_percent_forfeed),
                                      hetero_act_feeding_C_percent_bprod = hetero_act_feeding_C_redo/(bprod*24/1000000*1500)*100,
mixo_act_feeding_C_percent_bprod = mixo_act_feeding_redo/(bprod*24/1000000*1500)*100,
                                      MNAN_C_PP_percell_pot = Mixo_PP_potential/(MNAN_mL*1000),
                                      PNAN_C_PP_percell = Phyto_PP/(PNAN_mL*1000))

MNAN_C_removed_plot <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
   geom_boxplot(aes(x = station, y = MNAN_C_removed, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

HNAN_C_removed_plot <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
   geom_boxplot(aes(x = station, y = HNAN_C_removed, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

#ggarrange for these two here

Bacterivory_Cbased_ggarrange <- ggarrange(HNAN_C_removed_plot, MNAN_C_removed_plot, common.legend = TRUE, legend = "none",labels = c("C","D"), ncol = 1)

Bacterivory_Cbased_ggarrange1 <- annotate_figure(Bacterivory_Cbased_ggarrange, left = text_grob("ugrams C L"^-1~"day"^-1, rot = 90, size = 10))


MNAN_C_removed_percentRead_plot <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))+
   geom_boxplot(aes(x = station, y = mixo_act_feeding_redo, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

HNAN_C_removed_percentRead_plot <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))+
   geom_boxplot(aes(x = station, y = hetero_act_feeding_C_redo, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()


Bacterivory_Cbased_percentRead_ggarrange <- ggarrange(HNAN_C_removed_percentRead_plot, MNAN_C_removed_percentRead_plot, common.legend = TRUE, legend = "none",labels = c("E","F"), ncol = 1)

Bacterivory_Cbased_percentRead_ggarrange1 <- annotate_figure(Bacterivory_Cbased_percentRead_ggarrange, left = text_grob("ugrams C L"^-1~"day"^-1, rot = 90, size = 10))

#MNAN_PP_potential_plot
#PNAN_PP_plot
PP_distribution_ggarrange <- ggarrange(PNAN_PP_plot, MNAN_PP_potential_plot, labels = c("G","H"), ncol = 1, common.legend = TRUE, legend = "none")

#"arrange1" has the extra annotation - used "arrange" object for no lettering
#All_activity_ggarrange <- ggarrange(Bacterivory_ggarrange1, Bacterivory_Cbased_ggarrange1, Bacterivory_Cbased_percentRead_ggarrange, PP_distribution_ggarrange, common.legend = TRUE, legend = "bottom", nrow = 1, legend.grob = get_legend(HNAN_C_removed_percentRead_plot+theme(legend.position = "bottom")))

All_activity_ggarrange <- ggarrange(Bacterivory_ggarrange, Bacterivory_Cbased_ggarrange, Bacterivory_Cbased_percentRead_ggarrange, PP_distribution_ggarrange, common.legend = TRUE, legend = "bottom", nrow = 1, legend.grob = get_legend(HNAN_C_removed_percentRead_plot+theme(legend.position = "bottom")))

All_activity_ggarrange1 <- annotate_figure(All_activity_ggarrange, bottom = text_grob("Station", vjust = -4.5, hjust = 0.2))

All_activity_ggarrange1

ggsave("ALL_activity_boxplot.pdf", plot = All_activity_ggarrange1, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/", height = 6, width = 8)

#Rearranging the plots for more better story. Splitting 8 figures into 2 figures with 4 each
#First is the main figure with all mixotrophic measures
#Second is the supp figure with all HNAN/PNAN activity measures
#2x2 grid with traditional feeding measures on top and the 2 %read measures on the bottom for side by side comparison
#all of the plots are made here except the 2 SS per day plots: HNAN_SS_perday_plot_final1, MNAN_SS_perday_plot_final1

MNAN_only_activity_ggarrange <- ggarrange(MNAN_C_removed_plot, MNAN_C_removed_percentRead_plot,MNAN_PP_potential_plot, ncol = 3, nrow = 1, common.legend = TRUE, legend = "bottom")

HNAN_PNAN_activity_ggarrange <- ggarrange(HNAN_C_removed_plot, HNAN_C_removed_percentRead_plot,PNAN_PP_plot, nrow = 1, ncol = 3, common.legend = TRUE, legend = "bottom")
#These both look good, just need to ggsave them for fixing illustrator

ggsave("MNAN_only_activity_ggarrange.pdf", plot = MNAN_only_activity_ggarrange, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggsave("HNAN_PNAN_activity_ggarrange.pdf", plot = HNAN_PNAN_activity_ggarrange, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
Standing_stock_removed_ggarrange <- ggarrange(MNAN_SS_perday_plot_final1, HNAN_SS_perday_plot_final1, ncol = 2, nrow = 1, common.legend = TRUE, legend = "bottom")

ggsave("Standing_stock_removed_ggarrange.pdf", plot = Standing_stock_removed_ggarrange, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Splitting HNAN and PNAN activity for figure as they really shouldn't be comparable
HNAN_activity_ggarrange <- ggarrange(HNAN_C_removed_plot, HNAN_C_removed_percentRead_plot, nrow = 1, ncol = 2, common.legend = TRUE, legend = "bottom")

ggsave("HNAN_activity_ggarrange.pdf", plot = HNAN_activity_ggarrange, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#PNAN ggsave
ggsave("PNAN_PP_plot.pdf", plot = PNAN_PP_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#looking at %bproduction consumbed by each group
ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
   geom_boxplot(aes(x = station, y = hetero_act_feeding_C_percent_bprod, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
   geom_boxplot(aes(x = station, y = HNAN_percent_bprod_removed, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
   geom_boxplot(aes(x = station, y = mixo_act_feeding_C_percent_bprod, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
   geom_boxplot(aes(x = station, y = MNAN_percent_bprod_removed, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

#Making total PP activity and bacterial production ggarrange plots
ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
   geom_point(aes(x = station, y = bprod, color = depth))+ylab("")+labs(color = "Depth")+xlab("")+scale_color_manual(values = c("blue3","green4"))+theme_bw()

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
   geom_point(aes(x = station, y = pprod_PAR, color = depth))+ylab("")+labs(color = "Depth")+xlab("")+scale_color_manual(values = c("blue3","green4"))+theme_bw()
## Warning: Removed 5 rows containing missing values (`geom_point()`).

#ALL_abundance_PCA_plot - abundance plot
#Primary_Production_all_prcmp_plot activity plot
#ALL_response_Wo_bact_Cbased_plot

ggarrange(ALL_abundance_PCA_plot, Primary_Production_all_prcmp_plot, ALL_response_Wo_bact_Cbased_plot, common.legend = TRUE, legend = "bottom", nrow = 1)

#cbind(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$station, ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$Mixo_PP, ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$MNAN_C_removed)

Mixo_percent_carbon_PPandHeterodf <- select(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, Group, depth,station, Mixo_PP, MNAN_C_removed)  %>% group_by(Group, station, depth)%>% summarise(Mixo_PP_avg = mean(Mixo_PP),
                                                                                                                                                             Mixo_PP_sd = sd(Mixo_PP),
                                                                                                                                                             MNAN_C_removed_avg = mean(MNAN_C_removed),
                                                                                                                                                             MNAN_C_removed_sd = sd(MNAN_C_removed)) %>% mutate(Primary_Production_Percent = Mixo_PP_avg/(Mixo_PP_avg+MNAN_C_removed_avg)*100,
                                                                                                                                                                                                                Bacterivory_Percent = MNAN_C_removed_avg/(Mixo_PP_avg+MNAN_C_removed_avg)*100,
                                                                                                                                                                                                                total_MNAN_carbon_budget = Mixo_PP_avg+MNAN_C_removed_avg) %>% select(station, depth, Primary_Production_Percent, Bacterivory_Percent, total_MNAN_carbon_budget)
## `summarise()` has grouped output by 'Group', 'station'. You can override using the `.groups` argument.
## Adding missing grouping variables: `Group`
Percent_PP_carbonBudget <- ggplot(filter(Mixo_percent_carbon_PPandHeterodf, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Primary_Production_Percent, color = depth))

Percent_bacterivory_carbonBudget <- ggplot(filter(Mixo_percent_carbon_PPandHeterodf, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Bacterivory_Percent, color = depth))

#Saving the plots for presentation
ggsave("Percent_PP_carbonBudget.pdf", plot = Percent_PP_carbonBudget, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (`geom_point()`).
ggsave("Percent_bacterivory_carbonBudget.pdf", plot = Percent_bacterivory_carbonBudget, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (`geom_point()`).
Mixo_percent_carbon_PPandHeterodf_table <- flextable(filter(Mixo_percent_carbon_PPandHeterodf, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))

save_as_image(Mixo_percent_carbon_PPandHeterodf_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Mixo_carbon_percent_table.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Mixo_carbon_percent_table.png"
Mixo_percent_carbon_PPandHeterodf_noredun <- filter(Mixo_percent_carbon_PPandHeterodf, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")

DCM_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, depth == "DCM")
Surf_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, depth == "Surf")
North_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, Group != "Maguerite")
South_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, Group == "Maguerite")
#env_to_env_correlationmatrix
#All_responses_correlation_plot

Correlation_figure_arrange <- ggarrange(All_responses_correlation_plot_test, env_to_env_correlationmatrix_remREDUND, common.legend = TRUE, legend = "bottom", labels = c("A","B"))

Correlation_figure_arrange

ggsave("Correlation_figure_arrange.pdf", plot = Correlation_figure_arrange, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/", height = 5, width = 7)

To clean up my code, I really should make a plotting function for my multidemensional anlyses.

ALL non-nutrient/environmental data should be redone using NMDS analysis and NOT uisng prcomp. Prcomp assumes homogeneity and homoscedacity which is accepted on scaled and centered environmental data but not something we would expect looking at response data. NMDS is preferred and more accepted, however, trends usually remain consistent between the two methods. Also, the plotting and data would be more consistent as we are using vegan functions for constraining our analyses. After combining all of my math and datasets I will do this for the final plots that will be used in the paper. Function metaMDS() for NMDS analysis.

May NOT actually need to redo this portion. ONLY if reviewers ask for it.

Stick with hourly rates and not 24 hours. May need to adjust my calculations as I think most if not all of my anlyses are done over a 24 hour period. NEED to check this

Recalculate as a % of bacterial production to test the assumptions of both methods

Right now, % reads is an underestimation and beads would be a overestimation

Present both methods in the paper and present as a upper and lower limit for modelers to use in their estimations.

Questions to address in paper: -What is the distribution of mixotrophy (abundance) on a latitudinal gradient? -What is the distribution of mixotrophy (abundance) on a bathymetrical gradient? -What is the distribution of the mixotrophic activity (PP and bacterivory) along these gradients? Are they the same as the estimated abundance? -What are the environmental variables that influence mixotrophic activity and abundance? (measures of bacterivory, PP, and pop density) -Translate these to measures of Carbon either uptake from PP or consumed via bacterivory and do these measures remain consistent? (this may be removed if all analyses are done using carbon) -What is the role and importance of mixotrophy through the context of the greater carbon cycle in the WAP southern ocean waters? -present a new method for estimation of PP and bacterivory using RNA read #s as a proxy for activity and present a range of inputs (min and max) for mixotrophic C production/consumption.

#summarise_if - looped summary for all columns that are numeric and take the mean.
#example run from earlier
#dplyr::group_by(functional_reads, station, depth) %>% summarise_if(is.numeric, mean, rm.na = TRUE)

#I will summarise the dataset that has all of the calculations at all the stations. This comes from the chunk for Distribution of Activity Figure - SS perday.

#I will then repeat the model but for just all of the activity and repeat the envfit and adonis model selection for those as well. NO need to repeat any other PCA at the moment.

#I need to fix the NaNs and inf in the dataset
ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$MNAN_C_removed_percell[which(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$MNAN_C_removed_percell == "NaN")] <- 0

ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$MNAN_C_PP_percell_pot[which(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$MNAN_C_PP_percell_pot == "NaN")] <- 0

ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$MNAN_C_PP_percell_pot[which(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$MNAN_C_PP_percell_pot == "Inf")] <- 0

ant_community_final_PP_calc_grp <- filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket") %>% group_by(Group, station, depth) %>% summarise_if(is.numeric, mean, rm.na = TRUE)

rownames(ant_community_final_PP_calc_grp) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
ant_community_final_PP_functional_env_grp <- select(ant_community_final_PP_calc_grp,  
                                             -(Lat_deg:Long),
                                             -(waterT2:conductivity2), -oxygen2, -PAR2,
                                             -salinity2, -(Time_feeding:HNAN_mL),
                                             -(usphere_mL:mixo_act_feeding_C_percent_bprod),
                                             -(Total_act:Phyto_PP), 
                                             -(pprod_Sun:Pprod_PAR_20_stdev),
                                             -MNAN_C_PP_percell_pot,
                                             -PNAN_C_PP_percell
                                             )
#This has some redundant vars that I haven't removed yet. Gonna need to use the Red vars vector to remove them.

ant_community_final_PP_functional_env_grp <- ant_community_final_PP_functional_env_grp[,which(!colnames(ant_community_final_PP_functional_env_grp) %in% redundant_vars_vector)]
#I removed station A from the PCA dataset so I need to do the same to the env df
ant_community_final_PP_functional_env_grp <- ant_community_final_PP_functional_env_grp[3:18,]

#select for PCA analysis
ant_community_final_PP_calc_grp_activity_only <- ant_community_final_PP_calc_grp[, c("MNAN_C_removed", "HNAN_C_removed", "Mixo_PP_potential", "Phyto_PP")] 

rownames(ant_community_final_PP_calc_grp_activity_only) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
#Remove Station A here
ant_community_final_PP_calc_grp_activity_only <- ant_community_final_PP_calc_grp_activity_only[3:18,]

rownames(ant_community_final_PP_calc_grp_activity_only) <- grouped_rownames_remST[3:18]
## Warning: Setting row names on a tibble is deprecated.
#PCA
ant_community_final_PP_calc_grp_activity_only_prcomp <- prcomp(ant_community_final_PP_calc_grp_activity_only, center = TRUE, scale = TRUE)
#Envfit
ant_community_final_PP_calc_grp_activity_only_envfit <- envfit(ant_community_final_PP_calc_grp_activity_only ~ ., ant_community_final_PP_functional_env_grp, perm = 999)
#Important Vars: "ice", "ZML", "latitude", "NO2_NO3", "bprod"
#Model Selection
ant_community_final_PP_functional_MoSelNvar <- AICc.table.Nvar(c("ice", "ZML", "latitude", "NO2_NO3", "bprod"), matrix.char = vegdist(scale(ant_community_final_PP_calc_grp_activity_only), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_PP_functional_env_grp)

ant_community_final_PP_functional_MoSelALL <- AICc.table.all(c("ice", "ZML", "latitude", "NO2_NO3", "bprod"), matrix.char = vegdist(scale(ant_community_final_PP_calc_grp_activity_only), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
#PCA plot
ant_community_final_PP_calc_grp_activity_only_prcomp_scores <- as.data.frame(scores(ant_community_final_PP_calc_grp_activity_only_prcomp))
ant_community_final_PP_calc_grp_activity_only_prcomp_species <- as.data.frame(scores(ant_community_final_PP_calc_grp_activity_only_prcomp, display = "species"))*1.5
ant_community_final_PP_calc_grp_activity_only_prcomp_scores[,5] <- ant_community_final_PP_calc_grp[3:18,"depth"]
ant_community_final_PP_calc_grp_activity_only_prcomp_scores[,6] <- ant_community_final_PP_calc_grp[3:18,"Group"]

ant_community_final_PP_calc_grp_activity_only_envfit_scores <- as.data.frame(scores(ant_community_final_PP_calc_grp_activity_only_envfit, "vectors"))*2

ALL_acivity_PCA_grouped_plot <- ggplot(data = ant_community_final_PP_calc_grp_activity_only_prcomp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = ant_community_final_PP_calc_grp_activity_only_envfit_scores[c(2,5,6,9:11,13:15,19,20),], aes(x = 0, y = 0, xend = ant_community_final_PP_calc_grp_activity_only_envfit_scores[c(2,5,6,9:11,13:15,19,20),1], yend = ant_community_final_PP_calc_grp_activity_only_envfit_scores[c(2,5,6,9:11,13:15,19,20),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = ant_community_final_PP_calc_grp_activity_only_envfit_scores[c(2,5,6,9:11,13:15,19,20),], aes(x = ant_community_final_PP_calc_grp_activity_only_envfit_scores[c(2,5,6,9:11,13:15,19,20),1], y = ant_community_final_PP_calc_grp_activity_only_envfit_scores[c(2,5,6,9:11,13:15,19,20),2]-0.04), fontface = "bold", label = rownames(ant_community_final_PP_calc_grp_activity_only_envfit_scores[c(2,5,6,9:11,13:15,19,20),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(ant_community_final_PP_calc_grp_activity_only_prcomp_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())

ALL_acivity_PCA_grouped_plot

ggsave("ALL_acivity_PCA_grouped_plot.pdf", plot = ALL_acivity_PCA_grouped_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Without LAT LONG------
ant_community_final_PP_functional_env_grp_noLATLONG <- select(ant_community_final_PP_functional_env_grp, -latitude, -longitude)

ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit <- envfit(ant_community_final_PP_calc_grp_activity_only ~ ., ant_community_final_PP_functional_env_grp_noLATLONG, perm = 999)

ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit_scores <- as.data.frame(scores(ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit, "vectors"))*2

ALL_acivity_PCA_grouped_plot_noLATLONG <- ggplot(data = ant_community_final_PP_calc_grp_activity_only_prcomp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit_scores[c(2,5,6,9,13,16,18),], aes(x = 0, y = 0, xend = ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit_scores[c(2,5,6,9,13,16,18),1], yend = ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit_scores[c(2,5,6,9,13,16,18),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit_scores[c(2,5,6,9,13,16,18),], aes(x = ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit_scores[c(2,5,6,9,13,16,18),1], y = ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit_scores[c(2,5,6,9,13,16,18),2]-0.04), fontface = "bold", label = rownames(ant_community_final_PP_calc_grp_activity_only_noLATLONG_envfit_scores[c(2,5,6,9,13,16,18),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(ant_community_final_PP_calc_grp_activity_only_prcomp_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = ant_community_final_PP_calc_grp_activity_only_prcomp_species, aes(x = 0,y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = ant_community_final_PP_calc_grp_activity_only_prcomp_species, aes(x = PC1, y = PC2), color = "red", label = rownames(ant_community_final_PP_calc_grp_activity_only_prcomp_species))

ALL_acivity_PCA_grouped_plot_noLATLONG

ggsave("ALL_acivity_PCA_grouped_plot_noLATLONG.pdf", plot = ALL_acivity_PCA_grouped_plot_noLATLONG, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Use to get % var explained
#summary(ant_community_final_PP_calc_grp_activity_only_prcomp)

#Need to test to see how it looks with env vars with the uncombined calculation of Mixo PP
#ant_community_final_PP_calc_grp_activity_only_Mixo_split <- ant_community_final_PP_calc_grp[, c("MNAN_C_removed", "HNAN_C_removed", "Mixo_PP", "Mixo_maybe_PP", "Phyto_PP")] 

#rownames(ant_community_final_PP_calc_grp_activity_only_Mixo_split) <- grouped_rownames_remST

#ant_community_final_PP_calc_grp_activity_only_Mixo_split <- ant_community_final_PP_calc_grp_activity_only_Mixo_split[3:18,]

#rownames(ant_community_final_PP_calc_grp_activity_only_Mixo_split) <- grouped_rownames_remST[3:18]

#ant_community_final_PP_calc_grp_activity_only_Mixo_split_prcomp <- prcomp(ant_community_final_PP_calc_grp_activity_only_Mixo_split, center = TRUE, scale = TRUE)

#ant_community_final_PP_calc_grp_activity_only_Mixo_split_envfit <- envfit(ant_community_final_PP_calc_grp_activity_only_Mixo_split_prcomp ~ ., ant_community_final_PP_functional_env_grp, perm = 999)
#Just makes more vars that are correlated with latitude show up as significant. Not useful and actively makes it worse.

#Grouped Bacterivory only C based no LAT LONG--------
ant_community_final_PP_calc_grp_Cbased_bacterivory_only <- ant_community_final_PP_calc_grp[, c("MNAN_C_removed", "HNAN_C_removed")] 

rownames(ant_community_final_PP_calc_grp_Cbased_bacterivory_only) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
ant_community_final_PP_calc_grp_Cbased_bacterivory_only <- ant_community_final_PP_calc_grp_Cbased_bacterivory_only[3:18,]

rownames(ant_community_final_PP_calc_grp_Cbased_bacterivory_only) <- grouped_rownames_remST[3:18]
## Warning: Setting row names on a tibble is deprecated.
#PCA
ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp <- prcomp(ant_community_final_PP_calc_grp_Cbased_bacterivory_only, center = TRUE, scale = TRUE)
#Envfit
ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit <- envfit(ant_community_final_PP_calc_grp_Cbased_bacterivory_only ~ ., ant_community_final_PP_functional_env_grp_noLATLONG, perm = 999)

#Extract scores
ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp_scores <- as.data.frame(scores(ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp))
ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp_species <- as.data.frame(scores(ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp, display = "species"))*1.5
ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp_scores[,3] <- ant_community_final_PP_calc_grp[3:18,"depth"]
ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp_scores[,4] <- ant_community_final_PP_calc_grp[3:18,"Group"]

ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit_scores <- as.data.frame(scores(ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit, "vectors"))*2

#Plot
Cbased_bacterivory_PCA_grouped_plot_noLATLONG <- ggplot(data = ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),], aes(x = 0, y = 0, xend = ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),1], yend = ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),], aes(x = ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),1], y = ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),2]-0.04), fontface = "bold", label = rownames(ant_community_final_PP_calc_grp_Cbased_bacterivory_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp_species, aes(x = 0,y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp_species, aes(x = PC1, y = PC2), color = "red", label = rownames(ant_community_final_PP_calc_grp_Cbased_bacterivory_only_prcomp_species))

Cbased_bacterivory_PCA_grouped_plot_noLATLONG

ggsave("Cbased_bacterivory_PCA_grouped_plot_noLATLONG.pdf", plot = Cbased_bacterivory_PCA_grouped_plot_noLATLONG, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Group PP C based with no LAT LONG----------
#NEED to add adonis models if I want to keep these analyses
ant_community_final_PP_calc_grp_Cbased_PP_only <- ant_community_final_PP_calc_grp[, c("Mixo_PP_potential", "Phyto_PP")] 

rownames(ant_community_final_PP_calc_grp_Cbased_PP_only) <- grouped_rownames_remST
## Warning: Setting row names on a tibble is deprecated.
ant_community_final_PP_calc_grp_Cbased_PP_only <- ant_community_final_PP_calc_grp_Cbased_PP_only[3:18,]

rownames(ant_community_final_PP_calc_grp_Cbased_PP_only) <- grouped_rownames_remST[3:18]
## Warning: Setting row names on a tibble is deprecated.
#PCA
ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp <- prcomp(ant_community_final_PP_calc_grp_Cbased_PP_only, center = TRUE, scale = TRUE)
#Envfit
ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit <- envfit(ant_community_final_PP_calc_grp_Cbased_PP_only ~ ., ant_community_final_PP_functional_env_grp_noLATLONG, perm = 999)
#using the same env dataset made earlier in the chunk
#Extract scores
ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp_scores <- as.data.frame(scores(ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp))
ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp_species <- as.data.frame(scores(ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp, display = "species"))*1.5
ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp_scores[,3] <- ant_community_final_PP_calc_grp[3:18,"depth"]
ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp_scores[,4] <- ant_community_final_PP_calc_grp[3:18,"Group"]

ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit_scores <- as.data.frame(scores(ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit, "vectors"))*2

#Plot
Cbased_PP_PCA_grouped_plot_noLATLONG <- ggplot(data = ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),], aes(x = 0, y = 0, xend = ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),1], yend = ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),], aes(x = ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),1], y = ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),2]-0.04), fontface = "bold", label = rownames(ant_community_final_PP_calc_grp_Cbased_PP_only_noLATLONG_envfit_scores[c(2,5,6,9,10,13,16,18),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp_species, aes(x = 0,y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp_species, aes(x = PC1, y = PC2), color = "red", label = rownames(ant_community_final_PP_calc_grp_Cbased_PP_only_prcomp_species))

Cbased_PP_PCA_grouped_plot_noLATLONG

ggsave("Cbased_PP_PCA_grouped_plot_noLATLONG.pdf", plot = Cbased_PP_PCA_grouped_plot_noLATLONG, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Adonis Models for no Lat Long analyses------
#All abundance first
#env df     ant_community_final_calc_env_grp_remST_remREDUND_noLATLONG
#df         ant_community_final_functional_grp_fixed_remST
#Important vars     "ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen"
ant_community_final_PP_calc_grp_abundance_only_MoSelNvar <- AICc.table.Nvar(c("ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_calc_env_grp_remST_remREDUND_noLATLONG)

ant_community_final_PP_calc_grp_abundance_only_MoSelALL <- AICc.table.all(c("ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST_remREDUND_noLATLONG, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
#All activity 
#env df     ant_community_final_PP_functional_env_grp_noLATLONG
#df         ant_community_final_PP_calc_grp_activity_only
#Important vars     "ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen"
ant_community_final_PP_calc_grp_activity_only_MoSelNvar <- AICc.table.Nvar(c("ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen"), matrix.char = vegdist(scale(ant_community_final_PP_calc_grp_activity_only), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG)

ant_community_final_PP_calc_grp_activity_only_MoSelALL <- AICc.table.all(c("ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen"), matrix.char = vegdist(scale(ant_community_final_PP_calc_grp_activity_only), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
#All bacterivory only
#env df     ant_community_final_PP_functional_env_grp_noLATLONG
#df         ant_community_final_PP_calc_grp_activity_only
#Important vars     "ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen", "fluorescence"
ant_community_final_PP_calc_grp_Cbased_bacterivory_only_only_MoSelNvar <- AICc.table.Nvar(c("ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen", "fluorescence"), matrix.char = vegdist(scale(ant_community_final_PP_calc_grp_Cbased_bacterivory_only), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG)

ant_community_final_PP_calc_grp_Cbased_bacterivory_only_only_MoSelALL <- AICc.table.all(c("ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen", "fluorescence"), matrix.char = vegdist(scale(ant_community_final_PP_calc_grp_Cbased_bacterivory_only), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
#All PP only
#env df     ant_community_final_PP_functional_env_grp_noLATLONG
#df         ant_community_final_PP_calc_grp_activity_only
#Important vars     "ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen", "fluorescence"
ant_community_final_PP_calc_grp_Cbased_PP_only_MoSelNvar <- AICc.table.Nvar(c("ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen", "fluorescence"), matrix.char = vegdist(scale(ant_community_final_PP_calc_grp_Cbased_PP_only), "euclidean"), perm = 999, n.var = 3, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG)

ant_community_final_PP_calc_grp_Cbased_PP_only_only_MoSelALL <- AICc.table.all(c("ice", "NO2_NO3","Chla","Time","bprod","ZML","oxygen", "fluorescence"), matrix.char = vegdist(scale(ant_community_final_PP_calc_grp_Cbased_PP_only), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
Abundance_activity_pca_ggarrange <- ggarrange(ALL_abundance_PCA_plot_envVars, ALL_acivity_PCA_grouped_plot, common.legend = TRUE, legend = "right")

Abundance_activity_pca_ggarrange

ggsave("Abundance_activity_pca_ggarrange.pdf", plot = Abundance_activity_pca_ggarrange, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#ant_community_final_functional_grp_fixed_remST_hetero_MoSelALL <- AICc.table.all(c("PAR", "NH4", "fluorescence", "ZML"), matrix.char = vegdist(scale(ant_community_final_functional_grp_fixed_remST_hetero), "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_calc_env_grp_remST, comb.incl = c(1,2,3))
#Bacterivory only
test_dist <- vegdist(scale(ant_community_final_functional_grp_fixed_remST_hetero), method = "euclidean")

Bacterivory_adonis_summary <- adonis2(bacterivory_dist_matrix ~ latitude+NH4, method = "euclidean",data = ant_community_final_calc_env_grp_remST, perm = 999)

#write.table(Test_adonis_table, "test_adonis_table")
 
write.csv(Bacterivory_adonis_summary, "Bacterivory_adonis_summary.csv")
#Abundance
abundance_dist <- vegdist(scale(ant_community_final_functional_grp_fixed_remST), method = "euclidean")

All_abundance_adonis_summary <- adonis2(functional_dist_matrix ~ ice+fluorescence, method = "euclidean",data = ant_community_final_calc_env_grp_remST, perm = 999)

write.csv(All_abundance_adonis_summary, "All_abundance_adonis_summary.csv")

#All activity
all_activity_dist <- vegdist(scale(ant_community_final_PP_calc_grp_activity_only), method = "euclidean")

All_activity_adonis_summary <- adonis2(all_activity_dist ~ ice, method = "euclidean",data = ant_community_final_PP_functional_env_grp, perm = 999)

write.csv(All_activity_adonis_summary, "All_activity_adonis_summary.csv")
Jd_check_table <- ant_community_final_PP_calc_PCA_filter_bacbiomass[, c("station", "Rep", "Group", "depth", "MNAN_C_removed", "Mixo_PP", "Carbon_biomass_perML", "Pprod_PAR_20", "MNAN_mL", "Bact_mL", "MNAN_SS")] %>% mutate(Carbon_biomass_perL = Carbon_biomass_perML*1000)

write.table(Jd_check_table, "math_check_table")

HNAN_percent_bprod_test_plot <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket", !(station == "G" & depth == "DCM")))+
   geom_boxplot(aes(x = station, y = HNAN_percent_bprod_removed, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()+ylim(0,350)

MNAN_percent_bprod_test_plot <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket", !(station == "G" & depth == "DCM")))+
   geom_boxplot(aes(x = station, y = MNAN_percent_bprod_removed, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()+ylim(0,350)

HNAN_percent_bprod_test_PERCENTread_plot <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket", !(station == "G" & depth == "DCM")))+
   geom_boxplot(aes(x = station, y = hetero_act_feeding_C_percent_bprod, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()+ylim(0,350)

MNAN_percent_bprod_test_PERCENTread_plot <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket", !(station == "G" & depth == "DCM")))+
   geom_boxplot(aes(x = station, y = mixo_act_feeding_C_percent_bprod, fill = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()+ylim(0,350)

#I need to remove station G DCM from this plot

ggsave("HNAN_percent_bprod_test_plot.pdf", plot = HNAN_percent_bprod_test_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggsave("MNAN_percent_bprod_test_plot.pdf", plot = MNAN_percent_bprod_test_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggsave("HNAN_percent_bprod_test_PERCENTread_plot.pdf", plot = HNAN_percent_bprod_test_PERCENTread_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggsave("MNAN_percent_bprod_test_PERCENTread_plot.pdf", plot = MNAN_percent_bprod_test_PERCENTread_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
MNAN_percent_bprod_test_PERCENTread_plot

MNAN_percent_bprod_test_plot

HNAN_percent_bprod_test_PERCENTread_plot

HNAN_percent_bprod_test_plot

percent_bprod_ggarrange <- ggarrange(MNAN_percent_bprod_test_plot,MNAN_percent_bprod_test_PERCENTread_plot,HNAN_percent_bprod_test_plot,HNAN_percent_bprod_test_PERCENTread_plot, common.legend = TRUE, legend = "bottom")

ggsave("percent_bprod_ggarrange.pdf", percent_bprod_ggarrange, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Testing to make sure total carbon isn't being consumed by mixotrophs by including total C available in addition to the bacterial production.

#Calculation: bact_ml to bact_L then multiple by average bacterial volume = total biomass
#cont: biomass * conversion factor of either 2.2 or 3.0 x 10^-13 (g/mL) to get *available* g C per liter per day
#cont: multiple available C by SS removal % to get C removed by MNAN and or HNAN
#I can then normalize to a per cell basis by dividing by the number of cells for both PP and bacterivory for comparison there as well....we shall see what comes of this.

#Fro the paper given to me by JD average volume based on DAPI measurements were 0.045 +- 0.06
#Going to use this volume for my calculation.

#first *1000 is to convert to liters
#second *1000000 is to convert to ugrams
#third /100 is to convert from a whole number percent to decimal percent

#ant_community_final_PP_calc_PCA_filter_bacbiomass <- mutate(ant_community_final_PP_calc_PCA_filter, 
#                                      bac_biomass_avg = 0.045*Bact_mL,
#                                      Carbon_biomass_perML = bac_biomass_avg*(2.2*10^-13),
#                                      MNAN_C_removed = Carbon_biomass_perML*1000*MNAN_SS_perday*1000000/100
ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_BPROD_test <- ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA

ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_BPROD_test$bprod <- ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA$bprod*24/1000000*1500
ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_BPROD_test <- mutate(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_BPROD_test,
          total_carbon_available = (Carbon_biomass_perML*1000*1000000)+bprod,
          Percent_total_carbon_consumed = MNAN_C_removed/total_carbon_available)

ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_BPROD_test$Percent_total_carbon_consumed*100
##  [1]  3.3177971  0.0000000  0.0000000 16.7814757  0.0000000  3.0733803
##  [7]  3.2840734  0.0000000  3.5144751 10.4229008  3.9861115  0.8151742
## [13]  7.6094242  1.0985670 13.3919560 16.7399450  8.3699725  0.0000000
## [19]  1.7084788  9.4379151  4.8449736  8.4787038  4.8449736 17.3673584
## [25] 25.8155068 16.9020216 14.0677880 18.0370190  4.0540277  8.4790216
## [31]  0.5533269  6.7311143 16.2184631 25.9495410  9.7310779 23.9934545
## [37] 20.8296582 21.3673482 14.1169873  8.1448477 15.7310790 44.1065329
## [43] 13.1209219 20.8129753 20.3800396 20.3558037  6.6720880  7.3927373
## [49] 21.2807523  0.0000000  0.0000000  2.3180078  4.5871630  7.2464255
## [55] 17.3375919 32.8303310 16.3741689  8.8503496  5.6412965 11.5835331
## [61]  9.1016103  8.6965756 13.6459893  7.6593542  2.3413528  2.3685289
## [67]  3.8877442  1.4671937  0.0000000  0.0000000  0.0000000  0.0000000
## [73]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
## [79]  1.5255442  2.7933739  0.0000000  0.0000000  0.0000000  0.0000000
## [85]  1.7882260  0.1020599
#Highest % consumed is 44% - still wayyy too high but not inconcievable
#Now that I have confirmed that MNANs don't consume 100% of total available carbon I can make the plots as they should.
#dataset for the correlation matrix
#ant_community_final_calc_grp_remStations

ggplot(ant_community_final_calc_grp_remStations)+
  geom_point(aes(MNAN_mL, depth_m, color = depth))+
  geom_smooth(aes(MNAN_mL, depth_m),method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

test_ml_fit <- lm(depth_m~MNAN_mL, ant_community_final_calc_grp_remStations)

summary(test_ml_fit)
## 
## Call:
## lm(formula = depth_m ~ MNAN_mL, data = ant_community_final_calc_grp_remStations)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.9014 -12.7092  -0.0969   9.6876  28.6719 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.21866    7.05015  -0.031   0.9756  
## MNAN_mL      0.25701    0.08949   2.872   0.0111 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.73 on 16 degrees of freedom
## Multiple R-squared:  0.3402, Adjusted R-squared:  0.2989 
## F-statistic: 8.249 on 1 and 16 DF,  p-value: 0.01106
test_coef <- coefficients(test_ml_fit)

ggplot(ant_community_final_calc_grp_remStations)+
  geom_point(aes(MNAN_mL, depth_m, color = depth))+
  geom_abline(intercept = test_coef[1], slope = test_coef[2])

ggplot(ant_community_final_calc_grp_remStations)+
  geom_point(aes(latitude, Bact_mL, color = depth))

ggplot(ant_community_final_calc_grp_remStations)+
  geom_point(aes(latitude, bprod, color = depth))

bacterial_conc_lat_plot <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
   geom_point(aes(x = latitude, y = Bact_mL, color = depth))+ylab("")+labs(fill = "Depth")+xlab("")+scale_color_manual(values = c("blue3","green4"))+theme_bw()

ggsave("bacterial_conc_lat_plot.pdf", plot = bacterial_conc_lat_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#No matter how I visualize it. There is no latitudinal signal with bacterial abundance. It makes explaining the estimated grazing rates compared to bacterial production.
#I need to scale independently and rerun the analysis
#Steps to take
#1) subset all into a single df
#2) scale dataframe with all at once
#3) split scaled df into sections for PCA
#4) Make independent PCAs
#5) Then envfit on each PCA
#6) compare old to new

#Grouped df with ALL calcs: ant_community_final_PP_calc_grp
#Ungrouped df: ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA
#for all abundances: 
#response_vars_all_abundance <- c("MNAN_mL", "PNAN_mL","ANAN_mL","HNAN_mL")
response_vars_all_activity <- c("MNAN_C_removed", "HNAN_C_removed", "Mixo_PP_potential", "Phyto_PP")
PCA_debug_df <- ant_community_final_PP_calc_grp[, which(colnames(ant_community_final_PP_calc_grp) %in% response_vars_all_activity)]

PCA_debug_df_scale <- scale(PCA_debug_df)

PCA_debug_df_scale
##          Phyto_PP Mixo_PP_potential MNAN_C_removed HNAN_C_removed
##  [1,]          NA                NA     -1.3417119     -1.0185220
##  [2,]          NA                NA     -0.3227359      0.3147138
##  [3,] -0.34526191       -0.45135613     -1.1853389     -0.8567312
##  [4,] -0.82476694       -1.07767514     -0.7037505     -0.9487441
##  [5,] -0.50758406       -0.64872310     -0.2852134     -0.9584324
##  [6,] -0.70925672       -0.86738689     -1.0877916     -0.8943591
##  [7,] -0.79606955       -0.84001411     -0.9279771     -1.3899423
##  [8,] -0.88866393       -1.09028576      0.3468078     -0.4088957
##  [9,] -0.88960745        0.05964745      1.0979060      0.3734440
## [10,] -0.11580232        2.19001929     -0.4718947     -0.5534266
## [11,]  2.97195024        2.01287237      0.1707369     -0.6177707
## [12,]  1.43325203        0.71742965      0.5823859      0.2245008
## [13,] -0.07201981       -0.18919801     -0.0125233      0.5609460
## [14,]  0.26826286       -0.68675075      1.8917326      0.7711692
## [15,]  0.08959907        0.21779738      1.4998148      0.8597404
## [16,]  0.48245939        0.79706750      0.4133083      2.3334450
## [17,] -0.08218213       -0.05197490     -1.0763230      1.1273651
## [18,] -0.01430877       -0.09146884      1.4125681      1.0814998
## attr(,"scaled:center")
##          Phyto_PP Mixo_PP_potential    MNAN_C_removed    HNAN_C_removed 
##        0.38930049        1.11326201        0.18250410        0.07356999 
## attr(,"scaled:scale")
##          Phyto_PP Mixo_PP_potential    MNAN_C_removed    HNAN_C_removed 
##        0.39085930        0.87428450        0.12368377        0.03668775
PCA_debug_df_scale_noStA <- scale(PCA_debug_df[3:18,])

PCA_debug_df_scale_noStA
##          Phyto_PP Mixo_PP_potential MNAN_C_removed HNAN_C_removed
##  [1,] -0.34526191       -0.45135613    -1.29271064     -0.8767158
##  [2,] -0.82476694       -1.07767514    -0.80987329     -0.9662766
##  [3,] -0.50758406       -0.64872310    -0.39025079     -0.9757067
##  [4,] -0.70925672       -0.86738689    -1.19491037     -0.9133409
##  [5,] -0.79606955       -0.84001411    -1.03468145     -1.3957172
##  [6,] -0.88866393       -1.09028576     0.24340941     -0.4408147
##  [7,] -0.88960745        0.05964745     0.99645542      0.3206763
##  [8,] -0.11580232        2.19001929    -0.57741625     -0.5814940
##  [9,]  2.97195024        2.01287237     0.06688194     -0.6441234
## [10,]  1.43325203        0.71742965     0.47959845      0.1757023
## [11,] -0.07201981       -0.18919801    -0.11685354      0.5031815
## [12,]  0.26826286       -0.68675075     1.79234069      0.7078024
## [13,]  0.08959907        0.21779738     1.39940656      0.7940133
## [14,]  0.48245939        0.79706750     0.31008234      2.2284448
## [15,] -0.08218213       -0.05197490    -1.18341202      1.0545059
## [16,] -0.01430877       -0.09146884     1.31193355      1.0098629
## attr(,"scaled:center")
##          Phyto_PP Mixo_PP_potential    MNAN_C_removed    HNAN_C_removed 
##        0.38930049        1.11326201        0.19537068        0.07518381 
## attr(,"scaled:scale")
##          Phyto_PP Mixo_PP_potential    MNAN_C_removed    HNAN_C_removed 
##        0.39085930        0.87428450        0.12336385        0.03769222
rownames(PCA_debug_df_scale) <- grouped_rownames_remST

rownames(PCA_debug_df_scale_noStA) <- grouped_rownames_remST[3:18]

#Some initial impression notes: May need to REMOVE station A prior to analysis as this may be causing the shift? unlikely but worth looking into the previous code to see if I remove it before doing the PCAs before

#**NOT A PROBLEM station A was removed from the PCAs**

#now to split into 3 dfs for PCAs - ill do this in one call
#Forgot to remove station A before scale...may need to go back and check to see if it impacts scaling - **it does - changed and fixed** 
PCA_debud_prcomp_all_activity <- prcomp(PCA_debug_df_scale_noStA)

PCA_debud_prcomp_bacterivory <- prcomp(PCA_debug_df_scale_noStA[, c("MNAN_C_removed", "HNAN_C_removed")])

PCA_debud_prcomp_PP <- prcomp(PCA_debug_df_scale_noStA[,c("Mixo_PP_potential", "Phyto_PP")])

#A version that has the percell activity measures


#Add envfits
PCA_debud_prcomp_all_activity_envfit <- envfit(PCA_debud_prcomp_all_activity ~ ., ant_community_final_PP_functional_env_grp_noLATLONG, perm = 999)
#sig vars col#: 2,5,6,9,11,13,16,18
#sig vars name: ice, C
#With LAT/Long
PCA_debud_prcomp_all_activity_envfit_lat_long <- envfit(PCA_debud_prcomp_all_activity ~ ., ant_community_final_PP_functional_env_grp, perm = 999)
#sig vars col#: 2,5,6,9:11,13:15,18,20
#sig vars name: ice, C
PCA_debud_prcomp_bacterivory_envfit <- envfit(PCA_debud_prcomp_bacterivory ~ ., ant_community_final_PP_functional_env_grp_noLATLONG, perm = 999)
#sig vars col#: 2,5,6,13,16,18
#sig vars name:
PCA_debud_prcomp_PP_envfit <- envfit(PCA_debud_prcomp_PP ~ ., ant_community_final_PP_functional_env_grp_noLATLONG, perm = 999)
#sig vars col#: 2,3,6,9:11,14,18
#sig vars name: ice, airT, ZML, oxygen, beamTrans, salinity, NO2_NO3
#extract scores for each and make plots - do this on a model per section basis
#I really should've invested the time to make a function for each of these plotting calls. Gotta make my code more readable and may need to do this for code/data submission.
#I won't be making this mistake again...

#all activity debug -----------
PCA_debud_prcomp_all_activity_scores <- as.data.frame(scores(PCA_debud_prcomp_all_activity))
PCA_debud_prcomp_all_activity_species <- as.data.frame(scores(PCA_debud_prcomp_all_activity, display = "species"))*1.5
PCA_debud_prcomp_all_activity_scores[,5] <- ant_community_final_PP_calc_grp[3:18,"depth"]
PCA_debud_prcomp_all_activity_scores[,6] <- ant_community_final_PP_calc_grp[3:18,"Group"]

PCA_debud_prcomp_all_activity_envfit_scores <- as.data.frame(scores(PCA_debud_prcomp_all_activity_envfit, "vectors"))*2

PCA_debud_ALL_acivity_PCA_grouped_plot <- ggplot(data = PCA_debud_prcomp_all_activity_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = PCA_debud_prcomp_all_activity_envfit_scores[c(2,5,6,9,11,13,16,18),], aes(x = 0, y = 0, xend = PCA_debud_prcomp_all_activity_envfit_scores[c(2,5,6,9,11,13,16,18),1], yend = PCA_debud_prcomp_all_activity_envfit_scores[c(2,5,6,9,11,13,16,18),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = PCA_debud_prcomp_all_activity_envfit_scores[c(2,5,6,9,11,13,16,18),], aes(x = PCA_debud_prcomp_all_activity_envfit_scores[c(2,5,6,9,11,13,16,18),1], y = PCA_debud_prcomp_all_activity_envfit_scores[c(2,5,6,9,11,13,16,18),2]-0.04), fontface = "bold", label = rownames(PCA_debud_prcomp_all_activity_envfit_scores[c(2,5,6,9,11,13,16,18),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(PCA_debud_prcomp_all_activity_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = PCA_debud_prcomp_all_activity_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = PCA_debud_prcomp_all_activity_species, aes(x = PC1, y = PC2), color = "red", label= rownames(PCA_debud_prcomp_all_activity_species))

PCA_debud_ALL_acivity_PCA_grouped_plot

#With lat and long
PCA_debud_prcomp_all_activity_scores_Wlatlong <- as.data.frame(scores(PCA_debud_prcomp_all_activity))
PCA_debud_prcomp_all_activity_species_Wlatlong <- as.data.frame(scores(PCA_debud_prcomp_all_activity, display = "species"))*1.5
PCA_debud_prcomp_all_activity_scores_Wlatlong[,5] <- ant_community_final_PP_calc_grp[3:18,"depth"]
PCA_debud_prcomp_all_activity_scores_Wlatlong[,6] <- ant_community_final_PP_calc_grp[3:18,"Group"]

PCA_debud_prcomp_all_activity_envfit_scores_Wlatlong <- as.data.frame(scores(PCA_debud_prcomp_all_activity_envfit_lat_long, "vectors"))*2

PCA_debud_ALL_acivity_PCA_grouped_plot_Wlatlong <- ggplot(data = PCA_debud_prcomp_all_activity_scores_Wlatlong, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = PCA_debud_prcomp_all_activity_envfit_scores_Wlatlong[c(2,5,6,9:11,13:15,18,20),], aes(x = 0, y = 0, xend = PCA_debud_prcomp_all_activity_envfit_scores_Wlatlong[c(2,5,6,9:11,13:15,18,20),1], yend = PCA_debud_prcomp_all_activity_envfit_scores_Wlatlong[c(2,5,6,9:11,13:15,18,20),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = PCA_debud_prcomp_all_activity_envfit_scores_Wlatlong[c(2,5,6,9:11,13:15,18,20),], aes(x = PCA_debud_prcomp_all_activity_envfit_scores_Wlatlong[c(2,5,6,9:11,13:15,18,20),1], y = PCA_debud_prcomp_all_activity_envfit_scores_Wlatlong[c(2,5,6,9:11,13:15,18,20),2]-0.04), fontface = "bold", label = rownames(PCA_debud_prcomp_all_activity_envfit_scores_Wlatlong[c(2,5,6,9:11,13:15,18,20),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(PCA_debud_prcomp_all_activity_scores_Wlatlong), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = PCA_debud_prcomp_all_activity_species_Wlatlong, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = PCA_debud_prcomp_all_activity_species_Wlatlong, aes(x = PC1, y = PC2), color = "red", label= rownames(PCA_debud_prcomp_all_activity_species_Wlatlong))

PCA_debud_ALL_acivity_PCA_grouped_plot_Wlatlong

ggsave("PCA_debud_ALL_acivity_PCA_grouped_plot_Wlatlong.pdf", plot = PCA_debud_ALL_acivity_PCA_grouped_plot_Wlatlong, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#PP only debug ---------
PCA_debud_prcomp_PP_scores <- as.data.frame(scores(PCA_debud_prcomp_PP))
PCA_debud_prcomp_PP_species <- as.data.frame(scores(PCA_debud_prcomp_PP, display = "species"))*1.5
PCA_debud_prcomp_PP_scores[,3] <- ant_community_final_PP_calc_grp[3:18,"depth"]
PCA_debud_prcomp_PP_scores[,4] <- ant_community_final_PP_calc_grp[3:18,"Group"]

PCA_debud_prcomp_PP_envfit_scores <- as.data.frame(scores(PCA_debud_prcomp_PP_envfit, "vectors"))*2

PCA_debud_PP_PCA_grouped_plot <- ggplot(data = PCA_debud_prcomp_PP_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = PCA_debud_prcomp_PP_envfit_scores[c(2,3,6,9:11,14,18),], aes(x = 0, y = 0, xend = PCA_debud_prcomp_PP_envfit_scores[c(2,3,6,9:11,14,18),1], yend = PCA_debud_prcomp_PP_envfit_scores[c(2,3,6,9:11,14,18),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = PCA_debud_prcomp_PP_envfit_scores[c(2,3,6,9:11,14,18),], aes(x = PCA_debud_prcomp_PP_envfit_scores[c(2,3,6,9:11,14,18),1], y = PCA_debud_prcomp_PP_envfit_scores[c(2,3,6,9:11,14,18),2]-0.04), fontface = "bold", label = rownames(PCA_debud_prcomp_PP_envfit_scores[c(2,3,6,9:11,14,18),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(PCA_debud_prcomp_PP_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = PCA_debud_prcomp_PP_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = PCA_debud_prcomp_PP_species, aes(x = PC1, y = PC2), color = "red", label= rownames(PCA_debud_prcomp_PP_species))

PCA_debud_PP_PCA_grouped_plot

#Bacterivory only debug --------
PCA_debud_prcomp_bacterivory_scores <- as.data.frame(scores(PCA_debud_prcomp_bacterivory))
PCA_debud_prcomp_bacterivory_species <- as.data.frame(scores(PCA_debud_prcomp_bacterivory, display = "species"))*1.5
PCA_debud_prcomp_bacterivory_scores[,3] <- ant_community_final_PP_calc_grp[3:18,"depth"]
PCA_debud_prcomp_bacterivory_scores[,4] <- ant_community_final_PP_calc_grp[3:18,"Group"]

PCA_debud_prcomp_bacterivory_envfit_scores <- as.data.frame(scores(PCA_debud_prcomp_bacterivory_envfit, "vectors"))*2

PCA_debud_bacterivory_PCA_grouped_plot <- ggplot(data = PCA_debud_prcomp_bacterivory_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = PCA_debud_prcomp_bacterivory_envfit_scores[c(2,5,6,13,16,18),], aes(x = 0, y = 0, xend = PCA_debud_prcomp_bacterivory_envfit_scores[c(2,5,6,13,16,18),1], yend = PCA_debud_prcomp_bacterivory_envfit_scores[c(2,5,6,13,16,18),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = PCA_debud_prcomp_bacterivory_envfit_scores[c(2,5,6,13,16,18),], aes(x = PCA_debud_prcomp_bacterivory_envfit_scores[c(2,5,6,13,16,18),1], y = PCA_debud_prcomp_bacterivory_envfit_scores[c(2,5,6,13,16,18),2]-0.04), fontface = "bold", label = rownames(PCA_debud_prcomp_bacterivory_envfit_scores[c(2,5,6,13,16,18),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(PCA_debud_prcomp_bacterivory_scores), color = "grey3")+
  geom_segment(data = PCA_debud_prcomp_bacterivory_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = PCA_debud_prcomp_bacterivory_species, aes(x = PC1, y = PC2), color = "red", label= rownames(PCA_debud_prcomp_bacterivory_species))+
  xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())

PCA_debud_bacterivory_PCA_grouped_plot

PCA_debud_PP_PCA_grouped_plot

PCA_debud_ALL_acivity_PCA_grouped_plot

#The Debug worked and made it so that there's no contradiction between the variables significant between the 2 portions of the activity responses. 

#I want to quickly check the model by using the 2 mixo activity metrics together
#Also I spelled debug wrong in the object names...I'm gonna leave it that way for now
#Mixo_only_debug ---------
#Repeat all steps but with Mixo potential PP and Mixo feeding C and see what vars are important and see if they align with what's found in the other models
PCA_debud_prcomp_mixo_only_act <- prcomp(PCA_debug_df_scale_noStA[, c("MNAN_C_removed", "Mixo_PP_potential")])

PCA_debud_prcomp_mixo_only_act_envfit_noLATLONG <- envfit(PCA_debud_prcomp_mixo_only_act ~ ., ant_community_final_PP_functional_env_grp_noLATLONG, perm = 999)
#Important vars row#: 2,6,9,13,18
#Important vars name:

PCA_debud_prcomp_mixo_only_act_envfit <- envfit(PCA_debud_prcomp_mixo_only_act ~ ., ant_community_final_PP_functional_env_grp, perm = 999)
#Important vars row#: 2,6,9,11,13,15,20
PCA_debud_prcomp_mixo_only_act_scores <- as.data.frame(scores(PCA_debud_prcomp_mixo_only_act))
PCA_debud_prcomp_mixo_only_act_species <- as.data.frame(scores(PCA_debud_prcomp_mixo_only_act, display = "species"))*1.5
PCA_debud_prcomp_mixo_only_act_scores[,3] <- ant_community_final_PP_calc_grp[3:18,"depth"]
PCA_debud_prcomp_mixo_only_act_scores[,4] <- ant_community_final_PP_calc_grp[3:18,"Group"]

PCA_debud_prcomp_mixo_only_act_envfit_scores <- as.data.frame(scores(PCA_debud_prcomp_mixo_only_act_envfit, "vectors"))*2

PCA_debud_mixo_only_PCA_grouped_plot <- ggplot(data = PCA_debud_prcomp_mixo_only_act_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = PCA_debud_prcomp_mixo_only_act_envfit_scores[c(2,6,9,11,13,15,20),], aes(x = 0, y = 0, xend = PCA_debud_prcomp_mixo_only_act_envfit_scores[c(2,6,9,11,13,15,20),1], yend = PCA_debud_prcomp_mixo_only_act_envfit_scores[c(2,6,9,11,13,15,20),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = PCA_debud_prcomp_mixo_only_act_envfit_scores[c(2,6,9,11,13,15,20),], aes(x = PCA_debud_prcomp_mixo_only_act_envfit_scores[c(2,6,9,11,13,15,20),1], y = PCA_debud_prcomp_mixo_only_act_envfit_scores[c(2,6,9,11,13,15,20),2]-0.04), fontface = "bold", label = rownames(PCA_debud_prcomp_mixo_only_act_envfit_scores[c(2,6,9,11,13,15,20),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(PCA_debud_prcomp_mixo_only_act_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = PCA_debud_prcomp_mixo_only_act_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = PCA_debud_prcomp_mixo_only_act_species, aes(x = PC1, y = PC2), color = "red", label= rownames(PCA_debud_prcomp_mixo_only_act_species))

PCA_debud_mixo_only_PCA_grouped_plot

#This raises my last and FINAL question I will be dealing with on this paper...Can I actually use this method to see 2 different trends
#Debug test null signal with latitudinal signal---------
#I will throw 1 with a definite trend and 1 null and see If it picks up a signal.

#Step 1: make new scale dataset
Null_ds <- rnorm(16)
Lat_grad_ds <- c(10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85)

fake_df <- cbind(Null_ds, Lat_grad_ds)

fake_df_scale <- scale(fake_df)

rownames(fake_df_scale) <- grouped_rownames_remST[3:18]

#Step 2: make prcomp/envfit/plot
PCA_debud_prcomp_fake_actvity <- prcomp(fake_df_scale)

PCA_debud_prcomp_fake_actvity_envfit <- envfit(PCA_debud_prcomp_fake_actvity ~ ., ant_community_final_PP_functional_env_grp_noLATLONG, perm = 999)
#Important vars row#: 2,6,9,13,16,18
#Important vars name: fill in later if needed


PCA_debud_prcomp_fake_actvity_scores <- as.data.frame(scores(PCA_debud_prcomp_fake_actvity))
PCA_debud_prcomp_fake_actvity_species <- as.data.frame(scores(PCA_debud_prcomp_fake_actvity, display = "species"))*1.5
PCA_debud_prcomp_fake_actvity_scores[,3] <- ant_community_final_PP_calc_grp[3:18,"depth"]
PCA_debud_prcomp_fake_actvity_scores[,4] <- ant_community_final_PP_calc_grp[3:18,"Group"]

PCA_debud_prcomp_fake_actvity_envfit_scores <- as.data.frame(scores(PCA_debud_prcomp_fake_actvity_envfit, "vectors"))*2

PCA_debud_fake_PCA_grouped_plot <- ggplot(data = PCA_debud_prcomp_fake_actvity_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = PCA_debud_prcomp_fake_actvity_envfit_scores[c(2,6,9,13,16,18),], aes(x = 0, y = 0, xend = PCA_debud_prcomp_fake_actvity_envfit_scores[c(2,6,9,13,16,18),1], yend = PCA_debud_prcomp_fake_actvity_envfit_scores[c(2,6,9,13,16,18),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = PCA_debud_prcomp_fake_actvity_envfit_scores[c(2,6,9,13,16,18),], aes(x = PCA_debud_prcomp_fake_actvity_envfit_scores[c(2,6,9,13,16,18),1], y = PCA_debud_prcomp_fake_actvity_envfit_scores[c(2,6,9,13,16,18),2]-0.04), fontface = "bold", label = rownames(PCA_debud_prcomp_fake_actvity_envfit_scores[c(2,6,9,13,16,18),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(PCA_debud_prcomp_fake_actvity_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = PCA_debud_prcomp_fake_actvity_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = PCA_debud_prcomp_fake_actvity_species, aes(x = PC1, y = PC2), color = "red", label= rownames(PCA_debud_prcomp_fake_actvity_species))

PCA_debud_fake_PCA_grouped_plot

#all plots objects here
PCA_debud_fake_PCA_grouped_plot

PCA_debud_mixo_only_PCA_grouped_plot

PCA_debud_bacterivory_PCA_grouped_plot

PCA_debud_PP_PCA_grouped_plot

PCA_debud_ALL_acivity_PCA_grouped_plot

#Discovered the model is fine to find and extract relationships...I think. Check JDs opinion
#Now with these fixed models I need to do the model selection...Then I can write my paper/fix the final figures.

#Saving the debugged figures here------
#Null model
ggsave("PCA_debud_fake_PCA_grouped_plot.pdf", plot = PCA_debud_fake_PCA_grouped_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#all activity
ggsave("PCA_debud_ALL_acivity_PCA_grouped_plot.pdf", plot = PCA_debud_ALL_acivity_PCA_grouped_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#bacterivorous activity
ggsave("PCA_debud_bacterivory_PCA_grouped_plot.pdf", plot = PCA_debud_bacterivory_PCA_grouped_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#PP activity
ggsave("PCA_debud_PP_PCA_grouped_plot.pdf", plot = PCA_debud_PP_PCA_grouped_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Mixo only activity
ggsave("PCA_debud_mixo_only_PCA_grouped_plot.pdf", plot = PCA_debud_mixo_only_PCA_grouped_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Making the new model selection and adonis models---------
#Already scaled dataframe so I'm removing that step from the Mod select call

#all activity
AICc.table.all(c("ice", "Chla", "ZML", "oxygen", "bprod", "Time", "beamTrans", "NO2_NO3"), matrix.char = vegdist(PCA_debug_df_scale_noStA, "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
## # A tibble: 400 x 8
##    variables  AICc.values `Pseudo-_F_` `p-value` `Var Explnd` Model `Delta AICc`
##    <chr>            <dbl>        <dbl>     <dbl>        <dbl> <chr>        <dbl>
##  1 " ice"            17.9         9.34     0.001        0.400 model        0.735
##  2 " Chla"           20.2         6.23     0.002        0.308 model        3.03 
##  3 " ZML"            19.3         7.36     0.001        0.344 model        2.16 
##  4 " oxygen"         19.1         7.64     0.001        0.353 model        1.95 
##  5 " bprod"          21.3         4.90     0.01         0.259 model        4.11 
##  6 " Time"           19.0         7.79     0.001        0.358 model        1.84 
##  7 " beamTra…        19.7         6.84     0.002        0.328 model        2.55 
##  8 " NO2_NO3"        18.2         8.85     0.001        0.387 model        1.08 
##  9 " ice + C…        19.6         9.43     0.001        0.726 model        2.47 
## 10 " Chla + …        19.6         7.26     0.002        0.558 model        2.47 
## # … with 390 more rows, and 1 more variable: Relative Likelihood <dbl>
#Best model: Ice, ZML, Time

#Bacterivorous activity
AICc.table.all(c("ice", "Chla", "ZML", "bprod", "Time", "NO2_NO3"), matrix.char = vegdist(PCA_debug_df_scale_noStA[, c("MNAN_C_removed", "HNAN_C_removed")], "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
## # A tibble: 156 x 8
##    variables  AICc.values `Pseudo-_F_` `p-value` `Var Explnd` Model `Delta AICc`
##    <chr>            <dbl>        <dbl>     <dbl>        <dbl> <chr>        <dbl>
##  1 " ice"            5.63        11.1      0.002        0.443 model         2.87
##  2 " Chla"           7.76         7.98     0.005        0.363 model         5.00
##  3 " ZML"            7.64         8.15     0.002        0.368 model         4.88
##  4 " bprod"          8.38         7.15     0.009        0.338 model         5.62
##  5 " Time"           2.76        16.1      0.001        0.534 model         0   
##  6 " NO2_NO3"        4.40        13.1      0.002        0.484 model         1.64
##  7 " ice + C…        6.91        11.5      0.004        0.888 model         4.15
##  8 " Chla + …        6.91         9.48     0.003        0.729 model         4.15
##  9 " ice + Z…        7.41        11.2      0.001        0.861 model         4.65
## 10 " ZML + i…        7.41         9.30     0.004        0.716 model         4.65
## # … with 146 more rows, and 1 more variable: Relative Likelihood <dbl>
#Best model: Time
#If time removed: NO2_NO3

#PP activity
AICc.table.all(c("ice", "airT", "ZML", "oxygen", "salinity", "beamTrans", "NO2_NO3", "fluorescence"), matrix.char = vegdist(PCA_debug_df_scale_noStA[,c("Mixo_PP_potential", "Phyto_PP")], "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
## # A tibble: 400 x 8
##    variables  AICc.values `Pseudo-_F_` `p-value` `Var Explnd` Model `Delta AICc`
##    <chr>            <dbl>        <dbl>     <dbl>        <dbl> <chr>        <dbl>
##  1 " ice"           7.89          7.80     0.006        0.358 model        10.6 
##  2 " airT"          3.75         14.2      0.009        0.504 model         6.49
##  3 " ZML"           8.79          6.62     0.013        0.321 model        11.5 
##  4 " oxygen"        6.68          9.52     0.005        0.405 model         9.41
##  5 " salinit…      12.6           2.24     0.141        0.138 model        15.3 
##  6 " beamTra…       5.67         11.1      0.001        0.441 model         8.41
##  7 " NO2_NO3"       9.49          5.73     0.013        0.291 model        12.2 
##  8 " fluores…       8.51          6.98     0.012        0.333 model        11.2 
##  9 " ice + a…      -0.623        15.0      0.002        1.15  model         2.11
## 10 " airT + …      -0.623        21.1      0.001        1.62  model         2.11
## # … with 390 more rows, and 1 more variable: Relative Likelihood <dbl>
#Oxygen, airT, beamTrans

#Mixo only activity
AICc.table.all(c("ice", "ZML", "oxygen", "Time", "NO2_NO3"), matrix.char = vegdist(PCA_debug_df_scale_noStA[,c("MNAN_C_removed", "Mixo_PP_potential")], "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
## # A tibble: 85 x 8
##    variables  AICc.values `Pseudo-_F_` `p-value` `Var Explnd` Model `Delta AICc`
##    <chr>            <dbl>        <dbl>     <dbl>        <dbl> <chr>        <dbl>
##  1 " ice"            6.54         9.73     0.001        0.410 model         0   
##  2 " ZML"           11.1          3.86     0.032        0.216 model         4.55
##  3 " oxygen"         8.20         7.39     0.003        0.346 model         1.66
##  4 " Time"          10.2          4.86     0.013        0.258 model         3.67
##  5 " NO2_NO3"        9.56         5.65     0.01         0.287 model         3.02
##  6 " ice + Z…        9.55         9.07     0.002        0.698 model         3.02
##  7 " ZML + i…        9.55         4.78     0.011        0.367 model         3.02
##  8 " ice + o…        7.90        10.1      0.001        0.774 model         1.36
##  9 " oxygen …        7.90         8.48     0.002        0.652 model         1.36
## 10 " ice + T…        8.19         9.88     0.001        0.760 model         1.65
## # … with 75 more rows, and 1 more variable: Relative Likelihood <dbl>
#Best model: ice

#Adonis models after in respective order
#All activity
adonis2(vegdist(PCA_debug_df_scale_noStA, "euclidean") ~ ice+ZML+Time, data = ant_community_final_PP_functional_env_grp_noLATLONG, perm = 9999, method = "euclidean")
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = vegdist(PCA_debug_df_scale_noStA, "euclidean") ~ ice + ZML + Time, data = ant_community_final_PP_functional_env_grp_noLATLONG, permutations = 9999, method = "euclidean")
##          Df SumOfSqs      R2       F Pr(>F)    
## ice       1   24.017 0.40028 12.7576 0.0001 ***
## ZML       1    3.333 0.05555  1.7706 0.1709    
## Time      1   10.060 0.16766  5.3438 0.0037 ** 
## Residual 12   22.590 0.37651                   
## Total    15   60.000 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Bacterivorous activity
adonis2(vegdist(PCA_debug_df_scale_noStA[, c("MNAN_C_removed", "HNAN_C_removed")], "euclidean") ~ Time, data = ant_community_final_PP_functional_env_grp_noLATLONG, perm = 9999, method = "euclidean")
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = vegdist(PCA_debug_df_scale_noStA[, c("MNAN_C_removed", "HNAN_C_removed")], "euclidean") ~ Time, data = ant_community_final_PP_functional_env_grp_noLATLONG, permutations = 9999, method = "euclidean")
##          Df SumOfSqs      R2      F Pr(>F)    
## Time      1   16.025 0.53416 16.053  2e-04 ***
## Residual 14   13.975 0.46584                  
## Total    15   30.000 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#PP activity
adonis2(vegdist(PCA_debug_df_scale_noStA[,c("Mixo_PP_potential", "Phyto_PP")], "euclidean") ~ oxygen+airT+beamTrans, data = ant_community_final_PP_functional_env_grp_noLATLONG, perm = 9999, method = "euclidean")
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = vegdist(PCA_debug_df_scale_noStA[, c("Mixo_PP_potential", "Phyto_PP")], "euclidean") ~ oxygen + airT + beamTrans, data = ant_community_final_PP_functional_env_grp_noLATLONG, permutations = 9999, method = "euclidean")
##           Df SumOfSqs      R2       F Pr(>F)    
## oxygen     1  12.1463 0.40488 22.3699 0.0001 ***
## airT       1   9.3866 0.31289 17.2874 0.0010 ***
## beamTrans  1   1.9515 0.06505  3.5941 0.0606 .  
## Residual  12   6.5157 0.21719                   
## Total     15  30.0000 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Mixo only activity
adonis2(vegdist(PCA_debug_df_scale_noStA[,c("MNAN_C_removed", "Mixo_PP_potential")], "euclidean") ~ ice, data = ant_community_final_PP_functional_env_grp_noLATLONG, perm = 9999, method = "euclidean")
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = vegdist(PCA_debug_df_scale_noStA[, c("MNAN_C_removed", "Mixo_PP_potential")], "euclidean") ~ ice, data = ant_community_final_PP_functional_env_grp_noLATLONG, permutations = 9999, method = "euclidean")
##          Df SumOfSqs      R2     F Pr(>F)    
## ice       1     12.3 0.41001 9.729  4e-04 ***
## Residual 14     17.7 0.58999                 
## Total    15     30.0 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Quick investigation of all activity and all abundance on the PC2/PC3 axes
#want to see if the important vars changes and how they change
#replots of the 2 first then a look into the envfits
PCA_debud_prcomp_all_activity_envfit_PC23 <- envfit(PCA_debud_prcomp_all_activity ~ ., ant_community_final_PP_functional_env_grp_noLATLONG, perm = 999, choices = 2:3)

PCA_debud_prcomp_all_activity_envfit_PC23_scores <- as.data.frame(scores(PCA_debud_prcomp_all_activity_envfit_PC23, "vectors"))*2
#Var number: 3
#airT - attempted to refit w/o airT and it didn't change the fit for any of the vars there is little to no significant pattern for activity vars. need to cross reference correlation matrix table...
functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23 <- envfit(functional_prcomp_grp_remST ~ ., ant_community_final_calc_env_grp_remST_remREDUND_noLATLONG, perm = 999, choices = 2:3)
#Var number: 4,7,12,15
functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23_scores <- as.data.frame(scores(functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23,"vectors"))*2
#Abundance Plot
PCA_debud_ALL_abundance_PCA_grouped_plot_PC23 <- ggplot(data = functional_prcomp_grp_remST_scores_noRed, aes(x = PC2, y = PC3))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = functional_prcomp_grp_remST_species_noRed, aes(x = 0, y = 0, xend = PC2, yend = PC3), color = "red")+
  geom_segment(data = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23_scores[c(4,7,12,15),], aes(x = 0, y = 0, xend = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23_scores[c(4,7,12,15),1], yend = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23_scores[c(4,7,12,15),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23_scores[c(4,7,12,15),], aes(x = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23_scores[c(4,7,12,15),1], y = functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23_scores[c(4,7,12,15),2]-0.04), fontface = "bold", label = rownames(functional_prcomp_grp_remST_remREDUND_noLATLONG_fit_PC23_scores[c(4,7,12,15),]))+
  geom_text_repel(aes(x = PC2, y = PC3+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_scores_noRed), color = "grey3")+
  geom_text_repel(data = functional_prcomp_grp_remST_species_noRed,aes(x = PC2, y = PC3+0.1), size = 3,label = rownames(functional_prcomp_grp_remST_species_noRed), color = "red")+ labs(shape = "Depth")+ xlab("PC2 (~20%)")+ylab("PC3 (~10%)")+theme_bw()+theme(panel.grid = element_blank())

PCA_debud_ALL_abundance_PCA_grouped_plot_PC23

#Activity
PCA_debud_ALL_activity_PCA_grouped_plot_PC23 <- ggplot(data = PCA_debud_prcomp_all_activity_scores, aes(x = PC2, y = PC3))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = PCA_debud_prcomp_all_activity_envfit_scores[c(3),], aes(x = 0, y = 0, xend = PCA_debud_prcomp_all_activity_envfit_scores[c(3),1], yend = PCA_debud_prcomp_all_activity_envfit_scores[c(3),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = PCA_debud_prcomp_all_activity_envfit_scores[c(3),], aes(x = PCA_debud_prcomp_all_activity_envfit_scores[c(3),1], y = PCA_debud_prcomp_all_activity_envfit_scores[c(3),2]-0.04), fontface = "bold", label = rownames(PCA_debud_prcomp_all_activity_envfit_scores[c(3),]))+
  geom_text(aes(x = PC2, y = PC3-0.2), size = 3,label = rownames(PCA_debud_prcomp_all_activity_scores), color = "grey3")+xlab("PC2 (~20%)")+ylab("PC3 (~10%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = PCA_debud_prcomp_all_activity_species, aes(x = 0, y = 0, xend = PC2, yend = PC3), color = "red")+
  geom_text(data = PCA_debud_prcomp_all_activity_species, aes(x = PC2, y = PC3), color = "red", label= rownames(PCA_debud_prcomp_all_activity_species))

PCA_debud_ALL_activity_PCA_grouped_plot_PC23

#Saving the plots for manuscript
#abundance
ggsave("PCA_debud_ALL_abundance_PCA_grouped_plot_PC23.pdf", plot = PCA_debud_ALL_abundance_PCA_grouped_plot_PC23, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#activity
ggsave("PCA_debud_ALL_activity_PCA_grouped_plot_PC23.pdf", plot = PCA_debud_ALL_activity_PCA_grouped_plot_PC23, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Testing new envfit
#envfit_vars_selection_table_PC2impact(PCA_debud_prcomp_all_activity_envfit, "testing", "testing")

#envfit_vars_selection_table(PCA_debud_prcomp_all_activity_envfit, "testing", "testing")

#Making adonis models for the 2 final PCAs
#abundance
AICc.table.all(c("PAR", "Water_Temp", "oxygen_saturation", "depth_m"), matrix.char = vegdist(ant_community_final_functional_grp_fixed_remST[3:18,], "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG, comb.incl = c(1,2,3))
## [1] 1
## [1] 2
## [1] 3
## # A tibble: 40 x 8
##    variables  AICc.values `Pseudo-_F_` `p-value` `Var Explnd` Model `Delta AICc`
##    <chr>            <dbl>        <dbl>     <dbl>        <dbl> <chr>        <dbl>
##  1 " PAR"            235.       1.56       0.198      0.100   model         2.17
##  2 " Water_T…        236.       0.0523     0.82       0.00372 model         3.80
##  3 " oxygen_…        236.       0.324      0.576      0.0226  model         3.50
##  4 " depth_m"        236.       0.663      0.43       0.0452  model         3.12
##  5 " PAR + W…        238.       1.47       0.197      0.113   model         5.00
##  6 " Water_T…        238.       0.0547     0.817      0.00421 model         5.00
##  7 " PAR + o…        237.       1.52       0.215      0.117   model         4.49
##  8 " oxygen_…        237.       0.343      0.567      0.0264  model         4.49
##  9 " PAR + d…        238.       1.46       0.206      0.113   model         5.12
## 10 " depth_m…        238.       0.659      0.434      0.0507  model         5.12
## # … with 30 more rows, and 1 more variable: Relative Likelihood <dbl>
#Best model: Water_Temp, oxygen_saturation
#activity
AICc.table.all(c("airT"), matrix.char = vegdist(PCA_debug_df_scale_noStA, "euclidean"), perm = 999, method = "euclidean", df = ant_community_final_PP_functional_env_grp_noLATLONG, comb.incl = c(1))
## [1] 1
## # A tibble: 1 x 8
##   variables AICc.values `Pseudo-_F_` `p-value` `Var Explnd` Model `Delta AICc`
##   <chr>           <dbl>        <dbl>     <dbl>        <dbl> <chr>        <dbl>
## 1 " airT"          21.3         4.84     0.014        0.257 model            0
## # … with 1 more variable: Relative Likelihood <dbl>
#Best model: airT
#Making tables for the envfits and adonis models

#A version for percell activity
response_vars_all_activity_percell <- c("MNAN_C_removed_percell", "HNAN_C_removed_percell","MNAN_C_PP_percell_pot", "PNAN_C_PP_percell")
PCA_debug_df_percell <- ant_community_final_PP_calc_grp[, which(colnames(ant_community_final_PP_calc_grp) %in% response_vars_all_activity_percell)]

PCA_debug_df_scale_percell <- scale(PCA_debug_df_percell)

PCA_debug_df_scale_percell
##       MNAN_C_removed_percell HNAN_C_removed_percell MNAN_C_PP_percell_pot
##  [1,]            -1.75182216            -1.93162820                    NA
##  [2,]            -1.32248280            -0.04154172                    NA
##  [3,]            -0.33842081            -0.52594277           0.120790082
##  [4,]             0.19361835            -0.59339239          -0.574762271
##  [5,]             0.15329130            -0.48759716          -0.429870151
##  [6,]            -0.77492121            -0.70344878          -0.247476241
##  [7,]            -0.37323690            -0.70155369          -0.370640173
##  [8,]            -0.19123919             0.03472923          -0.706770376
##  [9,]             2.20547824             0.86982232          -0.007523391
## [10,]            -0.74645300            -0.52537110           3.585942342
## [11,]             0.02097476            -0.55082164           0.480338345
## [12,]            -0.33352471            -1.15248434          -0.306425141
## [13,]             0.72766851             1.03252758          -0.179198519
## [14,]             1.00812061             0.14794835          -0.595570260
## [15,]             1.66018783             1.11466651          -0.181504269
## [16,]             0.29926682             2.14516348          -0.194700430
## [17,]            -0.99619750             1.12087904          -0.032425642
## [18,]             0.55969186             0.74804529          -0.360203904
##       PNAN_C_PP_percell
##  [1,]                NA
##  [2,]                NA
##  [3,]         0.5507870
##  [4,]        -0.7518542
##  [5,]        -0.3675804
##  [6,]        -0.5980552
##  [7,]        -0.5962084
##  [8,]        -0.9079861
##  [9,]        -1.0878795
## [10,]        -0.1562907
## [11,]         3.2726286
## [12,]         0.3476176
## [13,]        -0.1393837
## [14,]         0.3156793
## [15,]        -0.1303675
## [16,]         0.3688830
## [17,]        -0.3250096
## [18,]         0.2050197
## attr(,"scaled:center")
## MNAN_C_removed_percell HNAN_C_removed_percell  MNAN_C_PP_percell_pot 
##           2.272968e-06           3.956760e-07           2.272419e-05 
##      PNAN_C_PP_percell 
##           1.525851e-07 
## attr(,"scaled:scale")
## MNAN_C_removed_percell HNAN_C_removed_percell  MNAN_C_PP_percell_pot 
##           8.913695e-07           1.557321e-07           2.993652e-05 
##      PNAN_C_PP_percell 
##           1.256198e-07
PCA_debug_df_scale_noStA_percell <- scale(PCA_debug_df_percell[3:18,])

PCA_debug_df_scale_noStA_percell
##       MNAN_C_removed_percell HNAN_C_removed_percell MNAN_C_PP_percell_pot
##  [1,]           -0.603552784            -0.69678726           0.120790082
##  [2,]            0.001677103            -0.76917369          -0.574762271
##  [3,]           -0.044197591            -0.65563504          -0.429870151
##  [4,]           -1.100100984            -0.88728538          -0.247476241
##  [5,]           -0.643158398            -0.88525158          -0.370640173
##  [6,]           -0.436123919            -0.09507829          -0.706770376
##  [7,]            2.290301370             0.80113732          -0.007523391
##  [8,]           -1.067716506            -0.69617375           3.585942342
##  [9,]           -0.194716452            -0.72348708           0.480338345
## [10,]           -0.597983147            -1.36918696          -0.306425141
## [11,]            0.609194636             0.97575137          -0.179198519
## [12,]            0.928227530             0.02642761          -0.595570260
## [13,]            1.669997305             1.06390226          -0.181504269
## [14,]            0.121859258             2.16982384          -0.194700430
## [15,]           -1.351817461             1.07056950          -0.032425642
## [16,]            0.418110039             0.67044712          -0.360203904
##       PNAN_C_PP_percell
##  [1,]         0.5507870
##  [2,]        -0.7518542
##  [3,]        -0.3675804
##  [4,]        -0.5980552
##  [5,]        -0.5962084
##  [6,]        -0.9079861
##  [7,]        -1.0878795
##  [8,]        -0.1562907
##  [9,]         3.2726286
## [10,]         0.3476176
## [11,]        -0.1393837
## [12,]         0.3156793
## [13,]        -0.1303675
## [14,]         0.3688830
## [15,]        -0.3250096
## [16,]         0.2050197
## attr(,"scaled:center")
## MNAN_C_removed_percell HNAN_C_removed_percell  MNAN_C_PP_percell_pot 
##           2.444239e-06           4.148814e-07           2.272419e-05 
##      PNAN_C_PP_percell 
##           1.525851e-07 
## attr(,"scaled:scale")
## MNAN_C_removed_percell HNAN_C_removed_percell  MNAN_C_PP_percell_pot 
##           7.835758e-07           1.451111e-07           2.993652e-05 
##      PNAN_C_PP_percell 
##           1.256198e-07
rownames(PCA_debug_df_scale_percell) <- grouped_rownames_remST

rownames(PCA_debug_df_scale_noStA_percell) <- grouped_rownames_remST[3:18]

#prcomps
PCA_debud_prcomp_all_activity_percell <- prcomp(PCA_debug_df_scale_noStA_percell)

PCA_debud_prcomp_mixo_only_act_percell <- prcomp(PCA_debug_df_scale_noStA_percell[, c("MNAN_C_removed_percell", "MNAN_C_PP_percell_pot")])

#Envfits
PCA_debud_prcomp_all_activity_envfit_lat_long_percell <- envfit(PCA_debud_prcomp_all_activity_percell ~ ., ant_community_final_PP_functional_env_grp, perm = 999)
#Important vars row#: 2,5,6,8,9,11,13:15,18,20,21
#Important vars name:

PCA_debud_prcomp_mixo_only_act_envfit_percell <- envfit(PCA_debud_prcomp_mixo_only_act_percell ~ ., ant_community_final_PP_functional_env_grp, perm = 999)
#Important vars row#: 2,9,21
#Important vars name:

#Make plots I wont have model selection done in time
#All activity plot
PCA_debud_prcomp_all_activity_percell_scores <- as.data.frame(scores(PCA_debud_prcomp_all_activity_percell))
PCA_debud_prcomp_all_activity_percell_species <- as.data.frame(scores(PCA_debud_prcomp_all_activity_percell, display = "species"))*1.5
PCA_debud_prcomp_all_activity_percell_scores[,5] <- ant_community_final_PP_calc_grp[3:18,"depth"]
PCA_debud_prcomp_all_activity_percell_scores[,6] <- ant_community_final_PP_calc_grp[3:18,"Group"]

PCA_debud_prcomp_all_activity_percell_envfit_scores <- as.data.frame(scores(PCA_debud_prcomp_all_activity_envfit_lat_long_percell, "vectors"))*2

PCA_debud_ALL_acivity_PCA_grouped_percell_plot <- ggplot(data = PCA_debud_prcomp_all_activity_percell_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = PCA_debud_prcomp_all_activity_percell_scores[c(2,5,6,8,9,11,13:15,18,20,21),], aes(x = 0, y = 0, xend = PCA_debud_prcomp_all_activity_percell_envfit_scores[c(2,5,6,8,9,11,13:15,18,20,21),1], yend = PCA_debud_prcomp_all_activity_percell_envfit_scores[c(2,5,6,8,9,11,13:15,18,20,21),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = PCA_debud_prcomp_all_activity_percell_envfit_scores[c(2,5,6,8,9,11,13:15,18,20,21),], aes(x = PCA_debud_prcomp_all_activity_percell_envfit_scores[c(2,5,6,8,9,11,13:15,18,20,21),1], y = PCA_debud_prcomp_all_activity_percell_envfit_scores[c(2,5,6,8,9,11,13:15,18,20,21),2]-0.04), fontface = "bold", label = rownames(PCA_debud_prcomp_all_activity_percell_envfit_scores[c(2,5,6,8,9,11,13:15,18,20,21),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(PCA_debud_prcomp_all_activity_percell_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = PCA_debud_prcomp_all_activity_percell_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = PCA_debud_prcomp_all_activity_percell_species, aes(x = PC1, y = PC2), color = "red", label= rownames(PCA_debud_prcomp_all_activity_percell_species))

PCA_debud_ALL_acivity_PCA_grouped_percell_plot

#Mixo only plot
PCA_debud_prcomp_mixo_only_act_percell_scores <- as.data.frame(scores(PCA_debud_prcomp_mixo_only_act_percell))
PCA_debud_prcomp_mixo_only_act_percell_species <- as.data.frame(scores(PCA_debud_prcomp_mixo_only_act_percell, display = "species"))*1.5
PCA_debud_prcomp_mixo_only_act_percell_scores[,3] <- ant_community_final_PP_calc_grp[3:18,"depth"]
PCA_debud_prcomp_mixo_only_act_percell_scores[,4] <- ant_community_final_PP_calc_grp[3:18,"Group"]

PCA_debud_prcomp_mixo_only_act_envfit_percell_envfit_scores <- as.data.frame(scores(PCA_debud_prcomp_mixo_only_act_envfit_percell, "vectors"))*2

PCA_debud_mixo_only_act_PCA_grouped_percell_plot <- ggplot(data = PCA_debud_prcomp_mixo_only_act_percell_scores, aes(x = PC1, y = PC2))+
  geom_point(aes(color = Group, shape = depth), size = 2)+
  geom_segment(data = PCA_debud_prcomp_mixo_only_act_envfit_percell_envfit_scores[c(2,9,21),], aes(x = 0, y = 0, xend = PCA_debud_prcomp_mixo_only_act_envfit_percell_envfit_scores[c(2,9,21),1], yend = PCA_debud_prcomp_mixo_only_act_envfit_percell_envfit_scores[c(2,9,21),2]), arrow = arrow(length = unit(0.1, "inches")))+
  geom_text_repel(data = PCA_debud_prcomp_mixo_only_act_envfit_percell_envfit_scores[c(2,9,21),], aes(x = PCA_debud_prcomp_mixo_only_act_envfit_percell_envfit_scores[c(2,9,21),1], y = PCA_debud_prcomp_mixo_only_act_envfit_percell_envfit_scores[c(2,9,21),2]-0.04), fontface = "bold", label = rownames(PCA_debud_prcomp_mixo_only_act_envfit_percell_envfit_scores[c(2,9,21),]))+
  geom_text(aes(x = PC1, y = PC2-0.2), size = 3,label = rownames(PCA_debud_prcomp_mixo_only_act_percell_scores), color = "grey3")+xlab("PC1 (51%)")+ylab("PC2 (31%)")+theme_bw()+theme(panel.grid = element_blank())+
  geom_segment(data = PCA_debud_prcomp_mixo_only_act_percell_species, aes(x = 0, y = 0, xend = PC1, yend = PC2), color = "red")+
  geom_text(data = PCA_debud_prcomp_mixo_only_act_percell_species, aes(x = PC1, y = PC2), color = "red", label= rownames(PCA_debud_prcomp_mixo_only_act_percell_species))

PCA_debud_mixo_only_act_PCA_grouped_percell_plot

PCA_debud_ALL_acivity_PCA_grouped_percell_plot

#Summary of PCA2/3 ##Acivity Summary PC1 is the increase/decrease of total activity which accounts for the large majority of variation (at least 70%). Initial models selected vars that are going to be most influential along the PC1 axis (most driven by latitudinal gradient). To attempt to decouple from the latitudinal gradient I attempted to analyze the data using PC2 and PC3 axes. PC2 is the difference between heterotrophic and PP originated Carbon production/usage. PC3 is then the difference within each of those categories. Honestly, there isn’t much to say here as when we hone in on the seconday PC axes, there is no meaningful correlation within the dataset. This is due to the orientation of MNAN activity within the PCA that doesn’t follow the rest of the trends viewed in the data. Something, I actually should’ve guessed based on the results when we split the PCAs. HNAN and PNAN activity are negatively correlated here, but positively correlated with similar env vars…

I need to think on this more and I’m currently getting stuck in a loop.

##Abundance Summary PC1 is the increase/decrease of total community abundance, which accounts for the vast majority of variation (at least 70%). Initial models had the same drawbacks as for activity. PC2 is the difference

##What I have decided…. The plots of PC1 and PC2 will be used as supplements as they explain most of the variance in the data. I will use the plots on PC2 and PC3 in the main plots as they look to distinguish the difference between the groups and this is what I want to emphasize. There’s almost nothing significant when it comes to activity levels. There a few important vars when it comes: PAR, depth, water_temp, oxygen saturation.

DCM_filter_all <- filter(ant_community_final_PP_calc_grp, depth == "DCM")
Surf_filter_all <- filter(ant_community_final_PP_calc_grp, depth == "Surf")
North_filter_all <- filter(ant_community_final_PP_calc_grp, Group != "Maguerite" | Group != "Offshore")
South_filter_all <- filter(ant_community_final_PP_calc_grp, Group == "Maguerite")

#DCM_summary_df <- matrix(,nrow = 3, ncol = 11, dimnames = list(c("Min", "Max", "Avg"),
#                                                              c("PNAN_mL", "MNAN_mL","HNAN_mL", "PNAN_PP","MNAN_PP", "MNAN_C_removed","MNAN_C_removed_act",  "MNAN_SS_day","HNAN_C_removed","HNAN_C_removed_act","HNAN_SS_day")
#))

#Making a new df for a summary table to pull from
#Make a new column with the new groups on it
ant_community_final_PP_calc_grp_for_sum <- ant_community_final_PP_calc_grp
ant_community_final_PP_calc_grp_for_sum$Group_final <- with(ant_community_final_PP_calc_grp_for_sum, ifelse(Group == "Maguerite", "South",
                                                                                                            ifelse(Group != "Maguerite" & Group != "Offshore", "North", "Offshore")))
#Need to fix bprod
#bprod*24/1000000*1500
ant_community_final_PP_calc_grp_for_sum$bprod <- ant_community_final_PP_calc_grp_for_sum$bprod*24/1000000*1500

group_by(ant_community_final_PP_calc_grp_for_sum, Group_final) %>% summarise_if(is.numeric, .funs = list(mean = mean, sd = sd), na.rm = TRUE)
## # A tibble: 3 x 219
##   Group_final Lat_deg_mean Lat_min_mean Lat_mean Long_deg_mean Long_min_mean
##   <chr>              <dbl>        <dbl>    <dbl>         <dbl>         <dbl>
## 1 North               64.5         32.5     65.0          63.5          19.8
## 2 Offshore            65            0.4     65.0          66            22.5
## 3 South               67.2         36.5     67.9          67.5          37.9
## # … with 213 more variables: Long_mean <dbl>, bottom_mean <dbl>,
## #   ice_mean <dbl>, airT_mean <dbl>, depth_m_mean <dbl>, Chla_mean <dbl>,
## #   prDM_mean <dbl>, depSM_mean <dbl>, ZML_T_m..0.2ºC._mean <dbl>,
## #   ZML_mean <dbl>, ZML_TSP_m..0.023Kg.m3._mean <dbl>, ZE_mean <dbl>,
## #   ZCM_m..maxFluo._mean <dbl>, Water_Temp_mean <dbl>, conductivity_mean <dbl>,
## #   waterT2_mean <dbl>, conductivity2_mean <dbl>, oxygen_mean <dbl>,
## #   oxygen2_mean <dbl>, fluorescence_mean <dbl>, beamTrans_mean <dbl>,
## #   PAR_mean <dbl>, PAR2_mean <dbl>, latitude_mean <dbl>, longitude_mean <dbl>,
## #   Time_mean <dbl>, altM_mean <dbl>, spar_mean <dbl>, timeS_mean <dbl>,
## #   scan_mean <dbl>, salinity_mean <dbl>, salinity2_mean <dbl>,
## #   oxygen_saturation_mean <dbl>, nbin_mean <dbl>, bprod_mean <dbl>,
## #   pprod_Sun_mean <dbl>, pprod_PAR_mean <dbl>, pprod_Sun_20_mean <dbl>,
## #   Pprod_PAR_20_mean <dbl>, Pprod_PAR_20_stdev_mean <dbl>, NH4_mean <dbl>,
## #   NO2_NO3_mean <dbl>, PO4_mean <dbl>, Total_DIN_mean <dbl>,
## #   N_P_Ratio_mean <dbl>, Time_feeding_mean <dbl>, ANAN_mL_mean <dbl>,
## #   PNAN_mL_mean <dbl>, MNAN_mL_mean <dbl>, HNAN_mL_mean <dbl>,
## #   Bact_mL_mean <dbl>, usphere_mL_mean <dbl>, usphere_mixo_mean <dbl>,
## #   usphere_hetero_mean <dbl>, mixo_act_mean <dbl>, mixo_maybe_act_mean <dbl>,
## #   phyto_act_mean <dbl>, hetero_act_mean <dbl>, bprod_stdev_mean <dbl>,
## #   pprod_Sun_stdev_mean <dbl>, pprod_PAR_stdev_mean <dbl>, TNAN_mean <dbl>,
## #   PNAN_percent_mean <dbl>, MNAN_percent_mean <dbl>, HNAN_percent_mean <dbl>,
## #   ANAN_percent_mean <dbl>, bac_usphere_ratio_mean <dbl>,
## #   MNAN_bacterivory_mean <dbl>, HNAN_bacterivory_mean <dbl>,
## #   MNAN_SS_mean <dbl>, HNAN_SS_mean <dbl>, MNAN_SS_perday_mean <dbl>,
## #   HNAN_SS_perday_mean <dbl>, Total_SS_mean <dbl>, MNAN_SS_percent_mean <dbl>,
## #   HNAN_SS_percent_mean <dbl>, Total_act_mean <dbl>,
## #   mixo_act_percent_mean <dbl>, mixo_maybe_act_percent_mean <dbl>,
## #   phyto_act_percent_mean <dbl>, Mixo_PP_mean <dbl>, Mixo_maybe_PP_mean <dbl>,
## #   Phyto_PP_mean <dbl>, Mixo_PP_sd_mean <dbl>, Mixo_maybe_PP_sd_mean <dbl>,
## #   Phyto_PP_sd_mean <dbl>, Mixo_PP_potential_mean <dbl>,
## #   Mixo_PP_potential_sd_mean <dbl>, bac_biomass_avg_mean <dbl>,
## #   Carbon_biomass_perML_mean <dbl>, MNAN_C_removed_mean <dbl>,
## #   HNAN_C_removed_mean <dbl>, MNAN_C_removed_percell_mean <dbl>,
## #   HNAN_C_removed_percell_mean <dbl>, Mixo_PP_percell_mean <dbl>,
## #   MNAN_percent_bprod_removed_mean <dbl>,
## #   HNAN_percent_bprod_removed_mean <dbl>,
## #   hetero_act_percent_forfeed_mean <dbl>,
## #   hetero_act_feeding_C_redo_mean <dbl>, mixo_act_feeding_redo_mean <dbl>, …
group_by(filter(ant_community_final_PP_calc_grp_for_sum, depth == "Surf"), Group_final) %>% summarise_if(is.numeric, .funs = list(mean = mean, sd = sd), na.rm = TRUE)
## # A tibble: 3 x 219
##   Group_final Lat_deg_mean Lat_min_mean Lat_mean Long_deg_mean Long_min_mean
##   <chr>              <dbl>        <dbl>    <dbl>         <dbl>         <dbl>
## 1 North               64.5         32.5     65.0          63.5          19.8
## 2 Offshore            65            0.4     65.0          66            22.5
## 3 South               67.2         36.5     67.9          67.5          37.9
## # … with 213 more variables: Long_mean <dbl>, bottom_mean <dbl>,
## #   ice_mean <dbl>, airT_mean <dbl>, depth_m_mean <dbl>, Chla_mean <dbl>,
## #   prDM_mean <dbl>, depSM_mean <dbl>, ZML_T_m..0.2ºC._mean <dbl>,
## #   ZML_mean <dbl>, ZML_TSP_m..0.023Kg.m3._mean <dbl>, ZE_mean <dbl>,
## #   ZCM_m..maxFluo._mean <dbl>, Water_Temp_mean <dbl>, conductivity_mean <dbl>,
## #   waterT2_mean <dbl>, conductivity2_mean <dbl>, oxygen_mean <dbl>,
## #   oxygen2_mean <dbl>, fluorescence_mean <dbl>, beamTrans_mean <dbl>,
## #   PAR_mean <dbl>, PAR2_mean <dbl>, latitude_mean <dbl>, longitude_mean <dbl>,
## #   Time_mean <dbl>, altM_mean <dbl>, spar_mean <dbl>, timeS_mean <dbl>,
## #   scan_mean <dbl>, salinity_mean <dbl>, salinity2_mean <dbl>,
## #   oxygen_saturation_mean <dbl>, nbin_mean <dbl>, bprod_mean <dbl>,
## #   pprod_Sun_mean <dbl>, pprod_PAR_mean <dbl>, pprod_Sun_20_mean <dbl>,
## #   Pprod_PAR_20_mean <dbl>, Pprod_PAR_20_stdev_mean <dbl>, NH4_mean <dbl>,
## #   NO2_NO3_mean <dbl>, PO4_mean <dbl>, Total_DIN_mean <dbl>,
## #   N_P_Ratio_mean <dbl>, Time_feeding_mean <dbl>, ANAN_mL_mean <dbl>,
## #   PNAN_mL_mean <dbl>, MNAN_mL_mean <dbl>, HNAN_mL_mean <dbl>,
## #   Bact_mL_mean <dbl>, usphere_mL_mean <dbl>, usphere_mixo_mean <dbl>,
## #   usphere_hetero_mean <dbl>, mixo_act_mean <dbl>, mixo_maybe_act_mean <dbl>,
## #   phyto_act_mean <dbl>, hetero_act_mean <dbl>, bprod_stdev_mean <dbl>,
## #   pprod_Sun_stdev_mean <dbl>, pprod_PAR_stdev_mean <dbl>, TNAN_mean <dbl>,
## #   PNAN_percent_mean <dbl>, MNAN_percent_mean <dbl>, HNAN_percent_mean <dbl>,
## #   ANAN_percent_mean <dbl>, bac_usphere_ratio_mean <dbl>,
## #   MNAN_bacterivory_mean <dbl>, HNAN_bacterivory_mean <dbl>,
## #   MNAN_SS_mean <dbl>, HNAN_SS_mean <dbl>, MNAN_SS_perday_mean <dbl>,
## #   HNAN_SS_perday_mean <dbl>, Total_SS_mean <dbl>, MNAN_SS_percent_mean <dbl>,
## #   HNAN_SS_percent_mean <dbl>, Total_act_mean <dbl>,
## #   mixo_act_percent_mean <dbl>, mixo_maybe_act_percent_mean <dbl>,
## #   phyto_act_percent_mean <dbl>, Mixo_PP_mean <dbl>, Mixo_maybe_PP_mean <dbl>,
## #   Phyto_PP_mean <dbl>, Mixo_PP_sd_mean <dbl>, Mixo_maybe_PP_sd_mean <dbl>,
## #   Phyto_PP_sd_mean <dbl>, Mixo_PP_potential_mean <dbl>,
## #   Mixo_PP_potential_sd_mean <dbl>, bac_biomass_avg_mean <dbl>,
## #   Carbon_biomass_perML_mean <dbl>, MNAN_C_removed_mean <dbl>,
## #   HNAN_C_removed_mean <dbl>, MNAN_C_removed_percell_mean <dbl>,
## #   HNAN_C_removed_percell_mean <dbl>, Mixo_PP_percell_mean <dbl>,
## #   MNAN_percent_bprod_removed_mean <dbl>,
## #   HNAN_percent_bprod_removed_mean <dbl>,
## #   hetero_act_percent_forfeed_mean <dbl>,
## #   hetero_act_feeding_C_redo_mean <dbl>, mixo_act_feeding_redo_mean <dbl>, …
group_by(filter(ant_community_final_PP_calc_grp_for_sum, depth == "DCM"), Group_final) %>% summarise_if(is.numeric, .funs = list(mean = mean, sd = sd), na.rm = TRUE)
## # A tibble: 3 x 219
##   Group_final Lat_deg_mean Lat_min_mean Lat_mean Long_deg_mean Long_min_mean
##   <chr>              <dbl>        <dbl>    <dbl>         <dbl>         <dbl>
## 1 North               64.5         32.5     65.0          63.5          19.8
## 2 Offshore            65            0.4     65.0          66            22.5
## 3 South               67.2         36.5     67.9          67.5          37.9
## # … with 213 more variables: Long_mean <dbl>, bottom_mean <dbl>,
## #   ice_mean <dbl>, airT_mean <dbl>, depth_m_mean <dbl>, Chla_mean <dbl>,
## #   prDM_mean <dbl>, depSM_mean <dbl>, ZML_T_m..0.2ºC._mean <dbl>,
## #   ZML_mean <dbl>, ZML_TSP_m..0.023Kg.m3._mean <dbl>, ZE_mean <dbl>,
## #   ZCM_m..maxFluo._mean <dbl>, Water_Temp_mean <dbl>, conductivity_mean <dbl>,
## #   waterT2_mean <dbl>, conductivity2_mean <dbl>, oxygen_mean <dbl>,
## #   oxygen2_mean <dbl>, fluorescence_mean <dbl>, beamTrans_mean <dbl>,
## #   PAR_mean <dbl>, PAR2_mean <dbl>, latitude_mean <dbl>, longitude_mean <dbl>,
## #   Time_mean <dbl>, altM_mean <dbl>, spar_mean <dbl>, timeS_mean <dbl>,
## #   scan_mean <dbl>, salinity_mean <dbl>, salinity2_mean <dbl>,
## #   oxygen_saturation_mean <dbl>, nbin_mean <dbl>, bprod_mean <dbl>,
## #   pprod_Sun_mean <dbl>, pprod_PAR_mean <dbl>, pprod_Sun_20_mean <dbl>,
## #   Pprod_PAR_20_mean <dbl>, Pprod_PAR_20_stdev_mean <dbl>, NH4_mean <dbl>,
## #   NO2_NO3_mean <dbl>, PO4_mean <dbl>, Total_DIN_mean <dbl>,
## #   N_P_Ratio_mean <dbl>, Time_feeding_mean <dbl>, ANAN_mL_mean <dbl>,
## #   PNAN_mL_mean <dbl>, MNAN_mL_mean <dbl>, HNAN_mL_mean <dbl>,
## #   Bact_mL_mean <dbl>, usphere_mL_mean <dbl>, usphere_mixo_mean <dbl>,
## #   usphere_hetero_mean <dbl>, mixo_act_mean <dbl>, mixo_maybe_act_mean <dbl>,
## #   phyto_act_mean <dbl>, hetero_act_mean <dbl>, bprod_stdev_mean <dbl>,
## #   pprod_Sun_stdev_mean <dbl>, pprod_PAR_stdev_mean <dbl>, TNAN_mean <dbl>,
## #   PNAN_percent_mean <dbl>, MNAN_percent_mean <dbl>, HNAN_percent_mean <dbl>,
## #   ANAN_percent_mean <dbl>, bac_usphere_ratio_mean <dbl>,
## #   MNAN_bacterivory_mean <dbl>, HNAN_bacterivory_mean <dbl>,
## #   MNAN_SS_mean <dbl>, HNAN_SS_mean <dbl>, MNAN_SS_perday_mean <dbl>,
## #   HNAN_SS_perday_mean <dbl>, Total_SS_mean <dbl>, MNAN_SS_percent_mean <dbl>,
## #   HNAN_SS_percent_mean <dbl>, Total_act_mean <dbl>,
## #   mixo_act_percent_mean <dbl>, mixo_maybe_act_percent_mean <dbl>,
## #   phyto_act_percent_mean <dbl>, Mixo_PP_mean <dbl>, Mixo_maybe_PP_mean <dbl>,
## #   Phyto_PP_mean <dbl>, Mixo_PP_sd_mean <dbl>, Mixo_maybe_PP_sd_mean <dbl>,
## #   Phyto_PP_sd_mean <dbl>, Mixo_PP_potential_mean <dbl>,
## #   Mixo_PP_potential_sd_mean <dbl>, bac_biomass_avg_mean <dbl>,
## #   Carbon_biomass_perML_mean <dbl>, MNAN_C_removed_mean <dbl>,
## #   HNAN_C_removed_mean <dbl>, MNAN_C_removed_percell_mean <dbl>,
## #   HNAN_C_removed_percell_mean <dbl>, Mixo_PP_percell_mean <dbl>,
## #   MNAN_percent_bprod_removed_mean <dbl>,
## #   HNAN_percent_bprod_removed_mean <dbl>,
## #   hetero_act_percent_forfeed_mean <dbl>,
## #   hetero_act_feeding_C_redo_mean <dbl>, mixo_act_feeding_redo_mean <dbl>, …
ant_community_final_PP_calc_grp_for_sum %>% summarise_if(is.numeric, .funs = list(mean = mean, sd = sd), na.rm = TRUE)
## # A tibble: 9 x 220
## # Groups:   Group [5]
##   Group   station Lat_deg_mean Lat_min_mean Lat_mean Long_deg_mean Long_min_mean
##   <fct>   <fct>          <dbl>        <dbl>    <dbl>         <dbl>         <dbl>
## 1 Gerlac… A                 64         32.4     64.5            62          22.4
## 2 Palmer  C                 64         53.7     64.9            64          12.1
## 3 Grandi… E                 65         14.3     65.2            64          11.9
## 4 Grandi… G                 65         29.5     65.5            64          32.8
## 5 Offsho… H                 65          0.4     65.0            66          22.5
## 6 Maguer… J                 67         47.3     67.8            68          39.3
## 7 Maguer… K                 67         39.5     67.7            67          55.9
## 8 Maguer… L                 67         56.4     67.9            67          39.2
## 9 Maguer… M                 68          2.8     68.0            68          17.3
## # … with 213 more variables: Long_mean <dbl>, bottom_mean <dbl>,
## #   ice_mean <dbl>, airT_mean <dbl>, depth_m_mean <dbl>, Chla_mean <dbl>,
## #   prDM_mean <dbl>, depSM_mean <dbl>, ZML_T_m..0.2ºC._mean <dbl>,
## #   ZML_mean <dbl>, ZML_TSP_m..0.023Kg.m3._mean <dbl>, ZE_mean <dbl>,
## #   ZCM_m..maxFluo._mean <dbl>, Water_Temp_mean <dbl>, conductivity_mean <dbl>,
## #   waterT2_mean <dbl>, conductivity2_mean <dbl>, oxygen_mean <dbl>,
## #   oxygen2_mean <dbl>, fluorescence_mean <dbl>, beamTrans_mean <dbl>,
## #   PAR_mean <dbl>, PAR2_mean <dbl>, latitude_mean <dbl>, longitude_mean <dbl>,
## #   Time_mean <dbl>, altM_mean <dbl>, spar_mean <dbl>, timeS_mean <dbl>,
## #   scan_mean <dbl>, salinity_mean <dbl>, salinity2_mean <dbl>,
## #   oxygen_saturation_mean <dbl>, nbin_mean <dbl>, bprod_mean <dbl>,
## #   pprod_Sun_mean <dbl>, pprod_PAR_mean <dbl>, pprod_Sun_20_mean <dbl>,
## #   Pprod_PAR_20_mean <dbl>, Pprod_PAR_20_stdev_mean <dbl>, NH4_mean <dbl>,
## #   NO2_NO3_mean <dbl>, PO4_mean <dbl>, Total_DIN_mean <dbl>,
## #   N_P_Ratio_mean <dbl>, Time_feeding_mean <dbl>, ANAN_mL_mean <dbl>,
## #   PNAN_mL_mean <dbl>, MNAN_mL_mean <dbl>, HNAN_mL_mean <dbl>,
## #   Bact_mL_mean <dbl>, usphere_mL_mean <dbl>, usphere_mixo_mean <dbl>,
## #   usphere_hetero_mean <dbl>, mixo_act_mean <dbl>, mixo_maybe_act_mean <dbl>,
## #   phyto_act_mean <dbl>, hetero_act_mean <dbl>, bprod_stdev_mean <dbl>,
## #   pprod_Sun_stdev_mean <dbl>, pprod_PAR_stdev_mean <dbl>, TNAN_mean <dbl>,
## #   PNAN_percent_mean <dbl>, MNAN_percent_mean <dbl>, HNAN_percent_mean <dbl>,
## #   ANAN_percent_mean <dbl>, bac_usphere_ratio_mean <dbl>,
## #   MNAN_bacterivory_mean <dbl>, HNAN_bacterivory_mean <dbl>,
## #   MNAN_SS_mean <dbl>, HNAN_SS_mean <dbl>, MNAN_SS_perday_mean <dbl>,
## #   HNAN_SS_perday_mean <dbl>, Total_SS_mean <dbl>, MNAN_SS_percent_mean <dbl>,
## #   HNAN_SS_percent_mean <dbl>, Total_act_mean <dbl>,
## #   mixo_act_percent_mean <dbl>, mixo_maybe_act_percent_mean <dbl>,
## #   phyto_act_percent_mean <dbl>, Mixo_PP_mean <dbl>, Mixo_maybe_PP_mean <dbl>,
## #   Phyto_PP_mean <dbl>, Mixo_PP_sd_mean <dbl>, Mixo_maybe_PP_sd_mean <dbl>,
## #   Phyto_PP_sd_mean <dbl>, Mixo_PP_potential_mean <dbl>,
## #   Mixo_PP_potential_sd_mean <dbl>, bac_biomass_avg_mean <dbl>,
## #   Carbon_biomass_perML_mean <dbl>, MNAN_C_removed_mean <dbl>,
## #   HNAN_C_removed_mean <dbl>, MNAN_C_removed_percell_mean <dbl>,
## #   HNAN_C_removed_percell_mean <dbl>, Mixo_PP_percell_mean <dbl>,
## #   MNAN_percent_bprod_removed_mean <dbl>,
## #   HNAN_percent_bprod_removed_mean <dbl>,
## #   hetero_act_percent_forfeed_mean <dbl>,
## #   hetero_act_feeding_C_redo_mean <dbl>, mixo_act_feeding_redo_mean <dbl>, …
mean(ant_community_final_PP_calc_grp_for_sum$MNAN_SS_perday)
## [1] 12.82745
sd(ant_community_final_PP_calc_grp_for_sum$MNAN_SS_perday)
## [1] 8.805309
mean(ant_community_final_PP_calc_grp_for_sum$MNAN_C_removed)
## [1] 0.1825041
sd(ant_community_final_PP_calc_grp_for_sum$MNAN_C_removed)
## [1] 0.1236838
mean(ant_community_final_PP_calc_grp_for_sum$HNAN_SS_perday)
## [1] 4.969584
sd(ant_community_final_PP_calc_grp_for_sum$HNAN_SS_perday)
## [1] 2.094155
mean(ant_community_final_PP_calc_grp_for_sum$Mixo_PP_potential)
## [1] NA
sd(ant_community_final_PP_calc_grp_for_sum$HNAN_C_removed)
## [1] 0.03668775
mean(filter(ant_community_final_PP_calc_grp_for_sum, depth == "Surf")$Phyto_PP, na.rm = TRUE)
## [1] 0.4073203
sd(filter(ant_community_final_PP_calc_grp_for_sum, depth == "Surf")$Phyto_PP, na.rm = TRUE)
## [1] 0.4818255
mean(filter(ant_community_final_PP_calc_grp_for_sum, depth == "DCM")$Phyto_PP, na.rm = TRUE)
## [1] 0.3712807
sd(filter(ant_community_final_PP_calc_grp_for_sum, depth == "DCM")$Phyto_PP, na.rm = TRUE)
## [1] 0.3073572
mean(filter(ant_community_final_PP_calc_grp_for_sum, depth == "Surf")$Mixo_PP_potential, na.rm = TRUE)
## [1] 1.12518
sd(filter(ant_community_final_PP_calc_grp_for_sum, depth == "Surf")$Mixo_PP_potential, na.rm = TRUE)
## [1] 0.7728431
mean(filter(ant_community_final_PP_calc_grp_for_sum, depth == "DCM")$Mixo_PP_potential, na.rm = TRUE)
## [1] 1.101344
sd(filter(ant_community_final_PP_calc_grp_for_sum, depth == "DCM")$Mixo_PP_potential, na.rm = TRUE)
## [1] 1.019967
ant_community_final_PP_calc_grp_for_production_plots <- ant_community_final_PP_calc_grp
ant_community_final_PP_calc_grp_for_production_plots$bprod <- ant_community_final_PP_calc_grp_for_production_plots$bprod*24/1000000*1500

ant_community_final_PP_calc_grp_for_production_plots$bprod_stdev <- ant_community_final_PP_calc_grp_for_production_plots$bprod_stdev*24/1000000*1500

Bprod_final_bar <- ggplot(dplyr::filter(ant_community_final_PP_calc_grp_for_production_plots, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_col(aes(x = station, y = bprod, fill = depth), position = position_dodge())+ylab("Bacterial Production")+labs(fill = "Depth")+xlab("Station")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

Bprod_final_bar

Total_PP_final_bar <- ggplot(dplyr::filter(ant_community_final_PP_calc_grp_for_production_plots, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_col(aes(x = station, y = pprod_Sun, fill = depth), position = position_dodge())+ylab("Primary Production")+labs(fill = "Depth")+xlab("Station")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()
#pprod_Sun  Surf
#pprod_PAR  DCM
Total_PP_final_bar
## Warning: Removed 2 rows containing missing values (`geom_col()`).

Total_PP_final_bar <- ggplot(filter(ant_community_final_PP_calc_grp_for_production_plots, !station %in% c("N", "O","P","Q","A"), depth == "Surf"))+
  geom_col(aes(x = station, y = pprod_Sun, fill = 'blue3'), position = position_nudge(x = -0.225), width = 0.45)+
  geom_col(data = filter(ant_community_final_PP_calc_grp_for_production_plots, !station %in% c("N", "O","P","Q","A"), depth == "DCM"), aes(x = station, y = pprod_Sun, fill = 'green4'), position = position_nudge(x = 0.225), width = 0.45)+ylab("Primary Production")+labs(fill = "Depth")+xlab("Station")+theme_bw()#+scale_fill_manual(values = c("blue3","green4"))+theme_bw()
Total_PP_final_bar

Bprod_final_bar

Total_PP_point_plot <- ggplot(filter(ant_community_final_PP_calc_grp_for_production_plots, !station %in% c("N", "O","P","Q","A"), depth == "Surf"))+
  geom_errorbar(aes(x = station, y = pprod_Sun, ymin = pprod_Sun - pprod_Sun_stdev, ymax = pprod_Sun +  pprod_Sun_stdev, color = depth), position = position_nudge(x = -0.225), width = 0.45)+
  geom_point(aes(x = station, y = pprod_Sun, color = 'blue3'), position = position_nudge(-0.225))+
  geom_errorbar(data = filter(ant_community_final_PP_calc_grp_for_production_plots, !station %in% c("N", "O","P","Q","A"), depth == "DCM"),aes(x = station, y = pprod_Sun, ymin = pprod_Sun - pprod_Sun_stdev, ymax = pprod_Sun +  pprod_Sun_stdev, color = depth), position = position_nudge(x = 0.225), width = 0.45)+
  geom_point(data = filter(ant_community_final_PP_calc_grp_for_production_plots, !station %in% c("N", "O","P","Q","A"), depth == "DCM"), aes(x = station, y = pprod_Sun, color = 'green4'), position = position_nudge(0.225))+ 
  ylab("")+ xlab("")+ labs(linetype = "Depth", color = "Functional Group")+theme_bw()

Bprod_point_plot <- ggplot(filter(ant_community_final_PP_calc_grp_for_production_plots, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))+
  geom_errorbar(aes(x = station, y = bprod, ymin = bprod - bprod_stdev, ymax = bprod +  bprod_stdev, color = depth), position = position_dodge(0.225))+
  geom_point(aes(x = station, y = bprod, color = depth), position = position_dodge(0.225))+ 
  ylab("")+ xlab("")+ labs(linetype = "Depth", color = "Functional Group", shape = "Depth")+scale_color_manual(values = c("blue3","green4"))+theme_bw()

Total_bact_PP_ggarrange <- ggarrange(Total_PP_point_plot, Bprod_point_plot, legend = "none")

Total_bact_PP_ggarrange

ggsave("Total_bact_PP_ggarrange.pdf", Total_bact_PP_ggarrange, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ant_community_final_PP_calc_grp_perc_read_abund <- mutate(ant_community_final_PP_calc_grp,
                                                          PNAN_mL_PerRead = TNAN*(phyto_act/Total_act),
                                                          MNAN_mL_PerRead = TNAN*((mixo_act +mixo_maybe_act)/Total_act),
                                                          HNAN_mL_PerRead = TNAN*(hetero_act/Total_act))

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_col(aes(station, PNAN_mL_PerRead, fill = depth), position = position_dodge())+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_col(aes(station, MNAN_mL_PerRead, fill = depth), position = position_dodge())+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_col(aes(station, HNAN_mL_PerRead, fill = depth), position = position_dodge())+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_col(aes(station, PNAN_mL_PerRead, fill = depth), position = position_dodge())+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_col(aes(station, MNAN_mL_PerRead, fill = depth), position = position_dodge())+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_col(aes(station, HNAN_mL_PerRead, fill = depth), position = position_dodge())+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_point(aes(phyto_act, PNAN_mL, color = depth))

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_point(aes(mixo_act, MNAN_mL, color = depth))

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_point(aes(hetero_act, HNAN_mL, color = depth))

ggplot(ant_community_final_PP_calc_grp_perc_read_abund)+
  geom_point(aes(hetero_act, bprod, color = depth))

The pattens of the percent abundance generally, do not follow the overall trends of the abundance measures estimated via microscopy. Elevates the MNAN abundance to what we expect based on the amount of RNA reads esimated from JDs paper, however, it modifies the trends for HNAN abundance data. There was no latitudinal signal, where the %read estimates doubles the HNANs and creates a strong latitudinal signal. While this would be consistent with the bacterial production data, this is not what I observed and there is evidence of decoupling of abundance and activity metrics. I do not believe this analysis to be fruitful or worthwhile and not an accurate representation of the amount of HNANs or really any NAN functional group.

If I provide new estimates for abundance based on %reads, do I then include estimates of environmental parameters for the new abundances? I need to think on this…

#I need to fix the bprod % read estimates before I can properly compare the distributions.
#ks.test(ant_community_final_PP_calc_grp$HNAN_percent_bprod_removed, ant_community_final_PP_calc_grp$hetero_act_feeding_C_percent_bprod)
#I can't compare the %bprod for HNANs....the scale is off for one of them.
ks.test(ant_community_final_PP_calc_grp$MNAN_C_removed, ant_community_final_PP_calc_grp$mixo_act_feeding_redo)
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  ant_community_final_PP_calc_grp$MNAN_C_removed and ant_community_final_PP_calc_grp$mixo_act_feeding_redo
## D = 0.22222, p-value = 0.781
## alternative hypothesis: two-sided
ks.test(ant_community_final_PP_calc_grp$HNAN_C_removed, ant_community_final_PP_calc_grp$hetero_act_feeding_C_redo)
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  ant_community_final_PP_calc_grp$HNAN_C_removed and ant_community_final_PP_calc_grp$hetero_act_feeding_C_redo
## D = 0.5, p-value = 0.02075
## alternative hypothesis: two-sided
#Comparing the standardized distributions...making quick vectors of units for comparison
#MNAN vectors
MNAN_C_removed_vector <- ant_community_final_PP_calc_grp$MNAN_C_removed
MNAN_C_removed_percentRead_vector <- ant_community_final_PP_calc_grp$mixo_act_feeding_redo
#HNAN vectors
HNAN_C_removed_vector <- ant_community_final_PP_calc_grp$HNAN_C_removed
HNAN_C_removed_percentRead_vector <- ant_community_final_PP_calc_grp$hetero_act_feeding_C_redo

#Now I need to normalize everything, subtract the mean and divide by the sd (zscore transformation)

MNAN_C_removed_vector_norm <- (MNAN_C_removed_vector-mean(MNAN_C_removed_vector))/sd(MNAN_C_removed_vector)
MNAN_C_removed_percentRead_vector_norm <- (MNAN_C_removed_percentRead_vector-mean(MNAN_C_removed_percentRead_vector))/sd(MNAN_C_removed_percentRead_vector)

HNAN_C_removed_vector_norm <- (HNAN_C_removed_vector-mean(HNAN_C_removed_vector))/sd(HNAN_C_removed_vector)
HNAN_C_removed_percentRead_vector_norm <- (HNAN_C_removed_percentRead_vector-mean(HNAN_C_removed_percentRead_vector))/sd(HNAN_C_removed_percentRead_vector)

HNAN_ks_test <- ks.test(HNAN_C_removed_vector_norm, HNAN_C_removed_percentRead_vector_norm)

MNAN_ks_test <- ks.test(MNAN_C_removed_vector_norm, MNAN_C_removed_percentRead_vector)

HNAN_ks_test
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  HNAN_C_removed_vector_norm and HNAN_C_removed_percentRead_vector_norm
## D = 0.11111, p-value = 1
## alternative hypothesis: two-sided
MNAN_ks_test
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  MNAN_C_removed_vector_norm and MNAN_C_removed_percentRead_vector
## D = 0.55556, p-value = 0.006669
## alternative hypothesis: two-sided
#Creating a df for the KS test results for my last figure
#Before going all the way with this I will test this with the abundance data to see if there's any consistency as seen with the HNAN bacterivory.

ks_matrix <- matrix(nrow = 2, ncol = 2, dimnames = list(c("MNAN Bacterivory","HNAN Bacterivory"), c("Statistic", "P-value")))

ks_matrix[1,1] <- MNAN_ks_test[["statistic"]]
ks_matrix[1,2] <- MNAN_ks_test[["p.value"]]
ks_matrix[2,1] <- HNAN_ks_test[["statistic"]]
ks_matrix[2,2] <- HNAN_ks_test[["p.value"]]

ks_df <- as.data.frame(ks_matrix)

write.csv(ks_df, file = "ks_table.csv")

Results from the K-S test has some similarities to the % read abundance. The K-S test is significant for HNANs, signifying there is a change to the distribution but the MNANs remain unchanged. However, what kind of change is creating the significance. Is it the change in shape? or is it the overall reduction in feeding? I believe its the former. This is likely resulting from a slight suppression of the HNAN feeding. Not sure what to do from here honestly…Do I change the paper to be a comparison between the 2 techniques and present both as potentially informative? Am I just a bad counter?

KS tests assess impacts of both variance and scale, or put another way: shape and magnitude. If the means are so different it will come up as significantly different. If I normalize the data, I will be removing the impacts of the scale (effect size) and only looking at the shape of the distributions. I will talk more with JD about this later on this week.

Post normalization KS tests: the general trends switch. HNANs are no longer significantly different from each other…but the MNANs distributions have significantly changed. What this tells me is the effect size (the means) of the distributions are significantly different between the two methods, but the shapes of the distributions are not different. Therefore, the relationships inferred via environmnetal variable regression are not impacted. However, the MNANs shape is significantly different, while the means are not. This may be due to several factors…high variance in MNAN feeding estimates of the initial method, larger effect size with the smaller impact of additional feeding attributed to MNANs. Summary: Shapes are different for MNANS and not for HNANs Means (scale) are different for HNANs and not for MNANs Distribution shape is more important here when it comes to inference of influential vars. Because there is very little HNANs in the % reads a change in scale is not surprising and I believe it to be less important or even negligible but probably worth mentioning.

#need to reshape the plot for stacked barplots
ant_community_final_PP_calc_grp_FORmelt <- select(ant_community_final_PP_calc_grp, "station", "depth", "mixo_act", "phyto_act","hetero_act","mixo_maybe_act", "MNAN_mL", "PNAN_mL", "HNAN_mL")
## Adding missing grouping variables: `Group`
ant_community_final_PP_calc_grp_melt_PercentRead <- melt(as.data.frame(ant_community_final_PP_calc_grp_FORmelt), id.vars = c("station", "depth","Group"), measure.vars = c("mixo_act","mixo_maybe_act", "phyto_act","hetero_act"))

Percent_read_stacked_plot <- ggplot(ant_community_final_PP_calc_grp_melt_PercentRead)+
  geom_col(aes(x = station, y = value, fill = variable), position = "fill")+ylab("Percentage of Reads")+labs(fill = "Functional Group")+theme_bw()

Percent_read_stacked_plot

ant_community_final_PP_calc_grp_melt_abundance <- melt(as.data.frame(ant_community_final_PP_calc_grp_FORmelt), id.vars = c("station", "depth","Group"), measure.vars = c("MNAN_mL", "PNAN_mL", "HNAN_mL"))

Abundance_stacked_plot <- ggplot(ant_community_final_PP_calc_grp_melt_abundance)+
  geom_col(aes(x = station, y = value, fill = variable), position = "fill")+ylab("Percentage of Abundance")+labs(fill = "Functional Group")+theme_bw()

Abundance_stacked_plot

ggsave(filename = "Percent_read_stacked_plot.pdf", Percent_read_stacked_plot)
## Saving 7 x 5 in image
ggsave(filename = "Abundance_stacked_plot.pdf", Abundance_stacked_plot)
## Saving 7 x 5 in image
#list of the Objects needed and their descriptions
#PNAN abundance plot - PNAN_plot_final_bar
#PNAN activity plot - PNAN_PP_plot
#MNAN abundance plot - MNAN_plot_final_bar
#MNAN PP activity plot - MNAN_PP_potential_plot
#MNAN bacterivory plot - MNAN_C_removed_plot
#% Carbon budget maps - done on home PC - won't be in this script

ggarrange(PNAN_plot_final_bar,PNAN_PP_plot, common.legend = TRUE, legend = "bottom")

ggarrange(MNAN_plot_final_bar,MNAN_PP_potential_plot, common.legend = TRUE, legend = "bottom")

MNAN_C_removed_plot

#He would want to remove station H from the graphs to make the trend more obvious for the report 
#Need to remake the plots filtering out station H as well as the others.
#Abundance
PNAN_plot_final_bar_BOB <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q", "H"), !depth == "Bucket"))+
  geom_col(aes(x = station, y = PNAN_mL, fill = depth), position = position_dodge())+ylab(expression("PNAN"~ "ml"^-1))+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()

MNAN_plot_final_bar_BOB <- ggplot(filter(ant_community_final_calc, !station %in% c("N", "O","P","Q","H"), !depth == "Bucket"))+
  geom_col(aes(x = station, y = MNAN_mL, fill = depth), position = position_dodge())+ylab(expression("MNAN"~ "ml"^-1))+labs(fill = "Depth")+xlab("")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()
#PP Activity
PNAN_PP_plot_BOB <- ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q","A","H"), !depth == "Bucket"))+
  geom_errorbar(aes(x = station, y = Phyto_PP, ymin = Phyto_PP - Phyto_PP_sd, ymax = Phyto_PP +  Phyto_PP_sd, color = depth), position = position_dodge(0.1))+
  geom_point(aes(x = station, y = Phyto_PP, color = depth))+ 
  ylab(expression("Primary Production (ug C"~ "L"^-1~"d"~-1~")"))+ xlab("Station")+ labs(linetype = "Depth", color = "Functional Group", shape = "Depth")+scale_color_manual(values = c("blue3","green4"))+theme_bw()

MNAN_PP_potential_plot_BOB <- ggplot(filter(ant_community_final_PP_calc, !station %in% c("N", "O","P","Q","A","H"), !depth == "Bucket"))+
  geom_errorbar(aes(x = station, y = Mixo_PP_potential, ymin = Mixo_PP_potential - Mixo_PP_potential_sd, ymax = Mixo_PP_potential +  Mixo_PP_potential_sd, color = depth), position = position_dodge(0.1))+
  geom_point(aes(x = station, y = Mixo_PP_potential, color = depth))+ 
  ylab(expression("Primary Production (ug C"~ "L"^-1~"d"~-1~")"))+ xlab("")+ labs(linetype = "Depth", color = "Functional Group", shape = "Depth")+scale_color_manual(values = c("blue3","green4"))+theme_bw()

#MNAN bacterivory
MNAN_C_removed_plot_BOB <- ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, !station %in% c("N", "O","P","Q","H"), !depth == "Bucket"))+
   geom_boxplot(aes(x = station, y = MNAN_C_removed, fill = depth))+labs(fill = "Depth")+xlab("Station")+scale_fill_manual(values = c("blue3","green4"))+theme_bw()+ylab(expression("Carbon Consumed (ug C"~ "L"^-1~"d"~-1~")"))

#PNAN_plot_final_bar_BOB
#MNAN_plot_final_bar_BOB
PNAN_ggarrange_BOB <- ggarrange(PNAN_plot_final_bar_BOB,PNAN_PP_plot_BOB, common.legend = TRUE, legend = "bottom")

MNAN_ggarrange_BOB <- ggarrange(MNAN_plot_final_bar_BOB,MNAN_PP_potential_plot_BOB, common.legend = TRUE, legend = "bottom")


MNAN_C_removed_plot_BOB

ggsave("PNAN_ggarrange_BOB.pdf", plot = PNAN_ggarrange_BOB, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggsave("MNAN_ggarrange_BOB.pdf", plot = MNAN_ggarrange_BOB, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggsave("MNAN_C_removed_plot_BOB.pdf", plot = MNAN_C_removed_plot_BOB, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
ggplot(ant_community_final_PP_calc_grp)+
  geom_point(aes(station, MNAN_C_removed_percell, color = depth))

#There are some problems with the grouped data as there are some NAN artifacts which were'nt removed by the rm.no function call

#There's no calc for per cell PP
ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL <- mutate(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, MNAN_C_PP_percell_pot = Mixo_PP_potential/(MNAN_mL*1000), PNAN_C_PP_percell = Phyto_PP/(PNAN_mL*1000), MNAN_percent_PP_percell = MNAN_C_PP_percell_pot/(MNAN_C_PP_percell_pot+MNAN_C_removed_percell)*100, MNAN_percent_bacterivory_percell = MNAN_C_removed_percell/(MNAN_C_PP_percell_pot+MNAN_C_removed_percell)*100)

#Filter out every situation where MNAN abundance is 0.
#Instead of filtering I'm using, if the 
ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL$MNAN_C_removed_percell[which(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL$MNAN_C_removed_percell == "NaN")] <- 0

ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL$MNAN_C_PP_percell_pot[which(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL$MNAN_C_PP_percell_pot == "NaN")] <- 0

ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL$MNAN_C_PP_percell_pot[which(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL$MNAN_C_PP_percell_pot == "Inf")] <- 0

Carbon_per_cell_filter <- filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL, !station %in% c("N", "O","P","Q"), !depth == "Bucket") %>% select(MNAN_C_removed_percell, HNAN_C_removed_percell,MNAN_C_PP_percell_pot, PNAN_C_PP_percell, Group, station, depth) %>%  group_by(Group, station, depth) %>% summarise_all(mean, rm.na = TRUE)

ggplot(Carbon_per_cell_filter)+
  geom_point(aes(station, MNAN_C_removed_percell, color = depth))

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(station, MNAN_C_removed_percell, fill = depth))

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(station, HNAN_C_removed_percell, fill = depth))

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(station, MNAN_C_PP_percell_pot, fill = depth))
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(station, PNAN_C_PP_percell, fill = depth))
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).

#There is no latitudinal gradient for MNAN activity, now there could be correlation of environmnetal parameters for activity on a percell basis.
ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter <- filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL, !station %in% c("A","N", "O","P","Q"), !depth == "Bucket")
cor.test(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter$MNAN_C_removed_percell, ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter$latitude)
## 
##  Pearson's product-moment correlation
## 
## data:  ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter$MNAN_C_removed_percell and ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter$latitude
## t = -0.84174, df = 46, p-value = 0.4043
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3935264  0.1668090
## sample estimates:
##        cor 
## -0.1231631
cor.test(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter$MNAN_C_PP_percell_pot, ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter$latitude)
## 
##  Pearson's product-moment correlation
## 
## data:  ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter$MNAN_C_PP_percell_pot and ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter$latitude
## t = 0.88703, df = 46, p-value = 0.3797
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1603630  0.3991094
## sample estimates:
##       cor 
## 0.1296806
View(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL_filter)

#Neither have any correlation with latitude. Doing PCA analysis on the per cell basis may allow for estimation of environmental variables on the standardized dataset which removes latitude.

#I could redo everything after with new objects but I think that increases my technical debt or bloat. Going back and adding into the proper chunks is the better approach here.

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(station, MNAN_percent_PP_percell, fill = depth))
## Warning: Removed 9 rows containing non-finite values (`stat_boxplot()`).

ggplot(filter(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA_per_CELL, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_boxplot(aes(station, MNAN_percent_bacterivory_percell, fill = depth))
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).

ggplot(filter(Mixo_percent_carbon_PPandHeterodf, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Primary_Production_Percent, color = depth))
## Warning: Removed 2 rows containing missing values (`geom_point()`).

ggplot(filter(Mixo_percent_carbon_PPandHeterodf, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Bacterivory_Percent, color = depth))
## Warning: Removed 2 rows containing missing values (`geom_point()`).

#Because of previous workflow problems, I'm creating a new df with the new primary production calcuations
#*Poterioochromonas malhamensis* when grown with bacteria saw a 1/5 reduction in PP rates
#related to reduced chlorophyll content
#NEED to make new plots with new PP for PNANs and MNANs
#Additionally, export the data for use to create a new map for carbon % content.
#Then need to decide if it's worth using/making a supplemental figure for it.

#df to be calculated from ant_community_final_PP_calc - from chunk 26
ant_community_final_PP_calc_PPpenalty <- dplyr::mutate(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA,
                                         Mixo_PP_min = Mixo_PP_potential/5,
                                         Mixo_PP_min_sd = Mixo_PP_potential_sd/5,
                                         Phyto_PP_max = Phyto_PP + (Mixo_PP_potential - Mixo_PP_min),
                                         Phyto_PP_max_sd = sqrt(Phyto_PP_sd^2 + Mixo_PP_min_sd^2))

#Now to make the visualizations as done in the previous chunk 26:
#One for max PNAN_PP
PNAN_PP_max_plot <- ggplot(filter(ant_community_final_PP_calc_PPpenalty, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))+
  geom_errorbar(aes(x = station, y = Phyto_PP_max, ymin = Phyto_PP_max - Phyto_PP_max_sd, ymax = Phyto_PP_max +  Phyto_PP_max_sd, color = depth), position = position_dodge(0.1))+
  geom_point(aes(x = station, y = Phyto_PP_max, color = depth))+ 
  ylab("")+ xlab("")+ labs(linetype = "Depth", color = "Functional Group", shape = "Depth")+scale_color_manual(values = c("blue3","green4"))+theme_bw()
#One for min MNAN_PP
MNAN_PP_min_plot <- ggplot(filter(ant_community_final_PP_calc_PPpenalty, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))+
  geom_errorbar(aes(x = station, y = Mixo_PP_min, ymin = Mixo_PP_min - Mixo_PP_min_sd, ymax = Mixo_PP_min +  Mixo_PP_min_sd, color = depth), position = position_dodge(0.1))+
  geom_point(aes(x = station, y = Mixo_PP_min, color = depth))+ 
  ylab("")+ xlab("")+ labs(linetype = "Depth", color = "Functional Group", shape = "Depth")+scale_color_manual(values = c("blue3","green4"))+theme_bw()

PNAN_PP_max_plot

MNAN_PP_min_plot

#PNAN_max ggsave
ggsave("PNAN_PP_max_plot.pdf", plot = PNAN_PP_max_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#MNAN_min ggsave
ggsave("MNAN_PP_min_plot.pdf", plot = MNAN_PP_min_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#MNAN_potential ggsave
ggsave("MNAN_PP_potential_plot.pdf", plot = MNAN_PP_potential_plot, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
#Now to recalculate the carbon budget using the new carbon values from the 80% efficieny PP penalty to MNANs

#From Chunk 35

Mixo_percent_carbon_PPandHeterodf_PPpenalty <- select(ant_community_final_PP_calc_PPpenalty, Group, depth,station, Mixo_PP_min, MNAN_C_removed)  %>% group_by(Group, station, depth)%>% summarise(Mixo_PP_avg = mean(Mixo_PP_min),
                                                                                                                                                             Mixo_PP_sd = sd(Mixo_PP_min),
                                                                                                                                                             MNAN_C_removed_avg = mean(MNAN_C_removed),
                                                                                                                                                             MNAN_C_removed_sd = sd(MNAN_C_removed)) %>% mutate(Primary_Production_Percent = Mixo_PP_avg/(Mixo_PP_avg+MNAN_C_removed_avg)*100,
                                                                                                                                                                                                                Bacterivory_Percent = MNAN_C_removed_avg/(Mixo_PP_avg+MNAN_C_removed_avg)*100,
                                                                                                                                                                                                                total_MNAN_carbon_budget = Mixo_PP_avg+MNAN_C_removed_avg) %>% select(station, depth, Primary_Production_Percent, Bacterivory_Percent, total_MNAN_carbon_budget)
## `summarise()` has grouped output by 'Group', 'station'. You can override using the `.groups` argument.
## Adding missing grouping variables: `Group`
Percent_PP_carbonBudget_PPpenalty <- ggplot(filter(Mixo_percent_carbon_PPandHeterodf_PPpenalty, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Primary_Production_Percent, color = depth))

Percent_bacterivory_carbonBudget_PPpenalty <- ggplot(filter(Mixo_percent_carbon_PPandHeterodf_PPpenalty, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Bacterivory_Percent, color = depth))

#Saving the plots for lab meeting
ggsave("Percent_PP_carbonBudget_PPpenalty.pdf", plot = Percent_PP_carbonBudget_PPpenalty, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (`geom_point()`).
ggsave("Percent_bacterivory_carbonBudget_PPpenalty.pdf", plot = Percent_bacterivory_carbonBudget_PPpenalty, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (`geom_point()`).
Mixo_percent_carbon_PPandHeterodf_PPpenalty_table <- flextable(filter(Mixo_percent_carbon_PPandHeterodf_PPpenalty, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))

save_as_image(Mixo_percent_carbon_PPandHeterodf_PPpenalty_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Mixo_carbon_percent_PPpenalty_table.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Mixo_carbon_percent_PPpenalty_table.png"
Mixo_percent_carbon_PPandHeterodf_noredun_PPpenalty <- filter(Mixo_percent_carbon_PPandHeterodf, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")

DCM_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, depth == "DCM")
Surf_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, depth == "Surf")
North_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, Group != "Maguerite")
South_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, Group == "Maguerite")

#Converting the flextables into regular dataframe objects and then combining in the proper format for JD to perform the PCoA analysis.

Mixo_percent_carbon_PPandHeterodf_PPpenalty_filt <- filter(Mixo_percent_carbon_PPandHeterodf_PPpenalty, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")

Mixo_percent_carbon_PPandHeterodf_table_filt <- filter(Mixo_percent_carbon_PPandHeterodf, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")

Mixo_percent_carbon_PPandHeterodf_table_filt <- select(Mixo_percent_carbon_PPandHeterodf_table_filt, -Primary_Production_Percent)

MNAN_bacterivory_percent_CB_vector <- Mixo_percent_carbon_PPandHeterodf_PPpenalty_filt[,c("Bacterivory_Percent","total_MNAN_carbon_budget")]
colnames(MNAN_bacterivory_percent_CB_vector) <- c("Bacterivory_Percent_PPpenalty","Total_MNAN_carbon_PPpenalty")

Mixo_percent_carbon_PPandHeterodf_table_filt_df <- cbind(Mixo_percent_carbon_PPandHeterodf_table_filt,MNAN_bacterivory_percent_CB_vector)

write.csv(Mixo_percent_carbon_PPandHeterodf_table_filt_df, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Mixo_percent_carbon_PPandHeterodf_table_filt_df.csv")

Notes from adding a PP efficiency penalty minimum for the MNAN PP estimate: Reducing the amount of PP estimated by 80% drastically changed the percent carbon budget trends. It went from dominated by PP to on average 50-50 with an almost random distribution of budget percentages throughout the region. This trend of near random I expected from the initial results but I didn’t expect such a strong deviation from the PP dominance. The general trend of total carbon budget increasing as we move poleward continues with this estimate as well, which is encouraging.

But I am not sure how this impacts how I write this up.

Total carbon continues to show increases along the latitudinal gradient - good and coincides with increases in PP/bacterivory/abundance.

Random distribution of budget percentages suggests more research and sampling is needed to accurately assess the distribution of MNAN flagellates within the community. Is a there a coupling of community composititon and the carbon budget then? instead of environmental parameters. Thereby indirectly assessing the carbon budget env params by looking at their impacts on MNAN genetic diversity/richness.

With the introduction of the penalty the MNAN community goes from mostly phototrophic to near neutral or 50/50 on average, with high variance across latitudes and depths.

Errors Calculations for C budget used the direct measurement for assessing the bacterivory for MNANs. I need to change the procedure to use the indirect measures to keep consistent with the estimates for comparison.

#Now to recalculate the carbon budget using the new carbon values from the 80% efficieny PP penalty to MNANs

#From Chunk 35 - Had to fix which MNAN bacterivory estimate used

Mixo_percent_carbon_PPandHeterodf_fix <- select(ant_community_final_PP_calc_PCA_filter_bacbiomass_WstA, Group, depth,station, Mixo_PP, mixo_act_feeding_redo)  %>% group_by(Group, station, depth)%>% summarise(Mixo_PP_avg = mean(Mixo_PP),
                                                                                                                                                             Mixo_PP_sd = sd(Mixo_PP),
                                                                                                                                                             MNAN_C_removed_avg = mean(mixo_act_feeding_redo),
                                                                                                                                                             MNAN_C_removed_sd = sd(mixo_act_feeding_redo)) %>% mutate(Primary_Production_Percent = Mixo_PP_avg/(Mixo_PP_avg+MNAN_C_removed_avg)*100,
                                                                                                                                                                                                                Bacterivory_Percent = MNAN_C_removed_avg/(Mixo_PP_avg+MNAN_C_removed_avg)*100,
                                                                                                                                                                                                                total_MNAN_carbon_budget = Mixo_PP_avg+MNAN_C_removed_avg) %>% select(station, depth, Primary_Production_Percent, Bacterivory_Percent, total_MNAN_carbon_budget)
## `summarise()` has grouped output by 'Group', 'station'. You can override using the `.groups` argument.
## Adding missing grouping variables: `Group`
####

Mixo_percent_carbon_PPandHeterodf_PPpenalty_fix <- select(ant_community_final_PP_calc_PPpenalty, Group, depth,station, Mixo_PP_min, mixo_act_feeding_redo)  %>% group_by(Group, station, depth)%>% summarise(Mixo_PP_avg = mean(Mixo_PP_min),
                                                                                                                                                             Mixo_PP_sd = sd(Mixo_PP_min),
                                                                                                                                                             MNAN_C_removed_avg = mean(mixo_act_feeding_redo),
                                                                                                                                                             MNAN_C_removed_sd = sd(mixo_act_feeding_redo)) %>% mutate(Primary_Production_Percent = Mixo_PP_avg/(Mixo_PP_avg+MNAN_C_removed_avg)*100,
                                                                                                                                                                                                                Bacterivory_Percent = MNAN_C_removed_avg/(Mixo_PP_avg+MNAN_C_removed_avg)*100,
                                                                                                                                                                                                                total_MNAN_carbon_budget = Mixo_PP_avg+MNAN_C_removed_avg) %>% select(station, depth, Primary_Production_Percent, Bacterivory_Percent, total_MNAN_carbon_budget)
## `summarise()` has grouped output by 'Group', 'station'. You can override using the `.groups` argument.
## Adding missing grouping variables: `Group`
Percent_PP_carbonBudget_PPpenalty_fix <- ggplot(filter(Mixo_percent_carbon_PPandHeterodf_PPpenalty_fix, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Primary_Production_Percent, color = depth))

Percent_bacterivory_carbonBudget_PPpenalty_fix <- ggplot(filter(Mixo_percent_carbon_PPandHeterodf_PPpenalty_fix, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Bacterivory_Percent, color = depth))

Percent_PP_carbonBudget_fix <- ggplot(filter(Mixo_percent_carbon_PPandHeterodf_fix, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Primary_Production_Percent, color = depth))

Percent_bacterivory_carbonBudget_fix <- ggplot(filter(Mixo_percent_carbon_PPandHeterodf_fix, !station %in% c("N", "O","P","Q"), !depth == "Bucket"))+
  geom_point(aes(station, Bacterivory_Percent, color = depth))

Percent_PP_carbonBudget_PPpenalty_fix
## Warning: Removed 2 rows containing missing values (`geom_point()`).

Percent_bacterivory_carbonBudget_PPpenalty_fix
## Warning: Removed 2 rows containing missing values (`geom_point()`).

Percent_PP_carbonBudget_fix
## Warning: Removed 2 rows containing missing values (`geom_point()`).

Percent_bacterivory_carbonBudget_fix
## Warning: Removed 2 rows containing missing values (`geom_point()`).

#Saving the plots for lab meeting
ggsave("Percent_PP_carbonBudget_PPpenalty_fix.pdf", plot = Percent_PP_carbonBudget_PPpenalty_fix, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (`geom_point()`).
ggsave("Percent_bacterivory_carbonBudget_PPpenalty_fix.pdf", plot = Percent_bacterivory_carbonBudget_PPpenalty_fix, path = "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/")
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (`geom_point()`).
Mixo_percent_carbon_PPandHeterodf_PPpenalty_fix_table <- flextable(filter(Mixo_percent_carbon_PPandHeterodf_PPpenalty_fix, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket"))

save_as_image(Mixo_percent_carbon_PPandHeterodf_PPpenalty_fix_table, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Mixo_carbon_percent_PPpenalty_fix_table.png")
## [1] "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Mixo_carbon_percent_PPpenalty_fix_table.png"
Mixo_percent_carbon_PPandHeterodf_noredun_PPpenalty_fix <- filter(Mixo_percent_carbon_PPandHeterodf, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")

#DCM_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, depth == "DCM")
#Surf_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, depth == "Surf")
#North_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, Group != "Maguerite")
#South_filter <- filter(Mixo_percent_carbon_PPandHeterodf_noredun, Group == "Maguerite")

#Converting the flextables into regular dataframe objects and then combining in the proper format for JD to perform the PCoA analysis.

Mixo_percent_carbon_PPandHeterodf_PPpenalty_filt_fix <- filter(Mixo_percent_carbon_PPandHeterodf_PPpenalty_fix, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")

Mixo_percent_carbon_PPandHeterodf_table_filt_fix <- filter(Mixo_percent_carbon_PPandHeterodf_fix, !station %in% c("N", "O","P","Q","A"), !depth == "Bucket")

Mixo_percent_carbon_PPandHeterodf_table_filt_fix <- select(Mixo_percent_carbon_PPandHeterodf_table_filt_fix, -Primary_Production_Percent)

MNAN_bacterivory_percent_CB_vector_fix <- Mixo_percent_carbon_PPandHeterodf_PPpenalty_filt_fix[,c("Bacterivory_Percent","total_MNAN_carbon_budget")]
colnames(MNAN_bacterivory_percent_CB_vector_fix) <- c("Bacterivory_Percent_PPpenalty","Total_MNAN_carbon_PPpenalty")

Mixo_percent_carbon_PPandHeterodf_table_filt_fix_df <- cbind(Mixo_percent_carbon_PPandHeterodf_table_filt_fix,MNAN_bacterivory_percent_CB_vector_fix)

write.csv(Mixo_percent_carbon_PPandHeterodf_table_filt_fix_df, "/Users/christophercarnivale/Desktop/Dissertation_data/Antarctica_community_data/Manuscript_figures/Mixo_percent_carbon_PPandHeterodf_table_filt_fix_df.csv")